14b693a06a65e4985047f33ed440f9fa6773bd4e
2 #include <gsl/gsl_linalg.h>
5 // Compute pseudo-inverse of a square matrix
6 static Real
* pinv(const Real
* matrix
, int dim
)
8 gsl_matrix
* U
= gsl_matrix_alloc(dim
,dim
);
9 gsl_matrix
* V
= gsl_matrix_alloc(dim
,dim
);
10 gsl_vector
* S
= gsl_vector_alloc(dim
);
11 gsl_vector
* work
= gsl_vector_alloc(dim
);
12 Real EPS
= 1e-10; //threshold for singular value "== 0"
15 copyArray(matrix
, U
->data
, dim
*dim
);
17 //U,S,V = SVD of matrix
18 gsl_linalg_SV_decomp(U
, V
, S
, work
);
19 gsl_vector_free(work
);
21 // Obtain pseudo-inverse by V*S^{-1}*t(U)
22 Real
* inverse
= (Real
*)malloc(dim
*dim
*sizeof(Real
));
23 for (int i
=0; i
<dim
; i
++)
25 for (int ii
=0; ii
<dim
; ii
++)
27 Real dotProduct
= 0.0;
28 for (int j
=0; j
<dim
; j
++)
29 dotProduct
+= V
->data
[i
*dim
+j
] * (S
->data
[j
] > EPS
? 1.0/S
->data
[j
] : 0.0) * U
->data
[ii
*dim
+j
];
30 inverse
[i
*dim
+ii
] = dotProduct
;
40 // TODO: comment EMGrank purpose
43 const Real
* Pi
, // parametre de proportion
44 const Real
* Rho
, // parametre initial de variance renormalise
45 int mini
, // nombre minimal d'iterations dans l'algorithme EM
46 int maxi
, // nombre maximal d'iterations dans l'algorithme EM
47 const Real
* X
, // regresseurs
48 const Real
* Y
, // reponse
49 Real tau
, // seuil pour accepter la convergence
50 const int* rank
, // vecteur des rangs possibles
52 Real
* phi
, // parametre de moyenne renormalise, calcule par l'EM
53 Real
* LLF
, // log vraisemblance associe a cet echantillon, pour les valeurs estimees des parametres
54 // additional size parameters
55 int n
, // taille de l'echantillon
56 int p
, // nombre de covariables
57 int m
, // taille de Y (multivarie)
58 int k
) // nombre de composantes
60 // Allocations, initializations
61 Real
* Phi
= (Real
*)calloc(p
*m
*k
,sizeof(Real
));
62 Real
* hatBetaR
= (Real
*)malloc(p
*m
*sizeof(Real
));
65 int deltaPhiBufferSize
= 20;
66 Real
* deltaPhi
= (Real
*)malloc(deltaPhiBufferSize
*sizeof(Real
));
68 Real sumDeltaPhi
= 0.0;
69 Real
* YiRhoR
= (Real
*)malloc(m
*sizeof(Real
));
70 Real
* XiPhiR
= (Real
*)malloc(m
*sizeof(Real
));
71 Real
* Xr
= (Real
*)malloc(n
*p
*sizeof(Real
));
72 Real
* Yr
= (Real
*)malloc(n
*m
*sizeof(Real
));
73 Real
* tXrXr
= (Real
*)malloc(p
*p
*sizeof(Real
));
74 Real
* tXrYr
= (Real
*)malloc(p
*m
*sizeof(Real
));
75 gsl_matrix
* matrixM
= gsl_matrix_alloc(p
, m
);
76 gsl_matrix
* matrixE
= gsl_matrix_alloc(m
, m
);
77 gsl_permutation
* permutation
= gsl_permutation_alloc(m
);
78 gsl_matrix
* V
= gsl_matrix_alloc(m
,m
);
79 gsl_vector
* S
= gsl_vector_alloc(m
);
80 gsl_vector
* work
= gsl_vector_alloc(m
);
82 //Initialize class memberships (all elements in class 0; TODO: randomize ?)
83 int* Z
= (int*)calloc(n
, sizeof(int));
85 //Initialize phi to zero, because some M loops might exit before phi affectation
86 zeroArray(phi
, p
*m
*k
);
88 while (ite
<mini
|| (ite
<maxi
&& sumDeltaPhi
>tau
))
94 //M step: Mise a jour de Beta (et donc phi)
95 for (int r
=0; r
<k
; r
++)
97 //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:)
99 for (int i
=0; i
<n
; i
++)
103 for (int j
=0; j
<p
; j
++)
104 Xr
[mi(cardClustR
,j
,n
,p
)] = X
[mi(i
,j
,n
,p
)];
105 for (int j
=0; j
<m
; j
++)
106 Yr
[mi(cardClustR
,j
,n
,m
)] = Y
[mi(i
,j
,n
,m
)];
113 //Compute tXrXr = t(Xr) * Xr
114 for (int j
=0; j
<p
; j
++)
116 for (int jj
=0; jj
<p
; jj
++)
118 Real dotProduct
= 0.0;
119 for (int u
=0; u
<cardClustR
; u
++)
120 dotProduct
+= Xr
[mi(u
,j
,n
,p
)] * Xr
[mi(u
,jj
,n
,p
)];
121 tXrXr
[mi(j
,jj
,p
,p
)] = dotProduct
;
125 //Get pseudo inverse = (t(Xr)*Xr)^{-1}
126 Real
* invTXrXr
= pinv(tXrXr
, p
);
128 // Compute tXrYr = t(Xr) * Yr
129 for (int j
=0; j
<p
; j
++)
131 for (int jj
=0; jj
<m
; jj
++)
133 Real dotProduct
= 0.0;
134 for (int u
=0; u
<cardClustR
; u
++)
135 dotProduct
+= Xr
[mi(u
,j
,n
,p
)] * Yr
[mi(u
,jj
,n
,m
)];
136 tXrYr
[mi(j
,jj
,p
,m
)] = dotProduct
;
140 //Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr
141 for (int j
=0; j
<p
; j
++)
143 for (int jj
=0; jj
<m
; jj
++)
145 Real dotProduct
= 0.0;
146 for (int u
=0; u
<p
; u
++)
147 dotProduct
+= invTXrXr
[mi(j
,u
,p
,p
)] * tXrYr
[mi(u
,jj
,p
,m
)];
148 matrixM
->data
[j
*m
+jj
] = dotProduct
;
153 //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
155 gsl_linalg_SV_decomp(matrixM
, V
, S
, work
);
158 gsl_matrix
* matrixM_
= gsl_matrix_alloc(m
, p
);
159 for (int j
=0; j
<m
; j
++)
161 for (int jj
=0; jj
<p
; jj
++)
162 matrixM_
->data
[jj
*m
+j
] = matrixM
->data
[j
*p
+jj
];
164 gsl_matrix
* V_
= gsl_matrix_alloc(p
,p
);
165 gsl_vector
* S_
= gsl_vector_alloc(p
);
166 gsl_vector
* work_
= gsl_vector_alloc(p
);
167 gsl_linalg_SV_decomp(matrixM_
, V_
, S_
, work_
);
168 gsl_vector_free(work_
);
169 for (int j
=0; j
<m
; j
++)
172 S
->data
[j
] = S_
->data
[j
];
175 for (int jj
=0; jj
<m
; jj
++)
178 V
->data
[jj
* m
+ j
] = V_
->data
[jj
* m
+ j
];
180 V
->data
[jj
* m
+ j
] = 0.0;
183 for (int j
=0; j
<m
; j
++)
185 for (int jj
=0; jj
<p
; jj
++)
186 matrixM
->data
[j
*p
+jj
] = matrixM_
->data
[jj
*m
+j
];
188 gsl_matrix_free(matrixM_
);
193 //Set m-rank(r) singular values to zero, and recompose
194 //best rank(r) approximation of the initial product
195 for (int j
=rank
[r
]; j
<m
; j
++)
198 //[intermediate step] Compute hatBetaR = U * S * t(V)
199 double* U
= matrixM
->data
; //GSL require double precision
200 for (int j
=0; j
<p
; j
++)
202 for (int jj
=0; jj
<m
; jj
++)
204 Real dotProduct
= 0.0;
205 for (int u
=0; u
<m
; u
++)
206 dotProduct
+= U
[j
*m
+u
] * S
->data
[u
] * V
->data
[jj
*m
+u
];
207 hatBetaR
[mi(j
,jj
,p
,m
)] = dotProduct
;
211 //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r)
212 for (int j
=0; j
<p
; j
++)
214 for (int jj
=0; jj
<m
; jj
++)
217 for (int u
=0; u
<m
; u
++)
218 dotProduct
+= hatBetaR
[mi(j
,u
,p
,m
)] * Rho
[ai(u
,jj
,r
,m
,m
,k
)];
219 phi
[ai(j
,jj
,r
,p
,m
,k
)] = dotProduct
;
228 Real sumLogLLF2
= 0.0;
229 for (int i
=0; i
<n
; i
++)
232 Real maxLogGamIR
= -INFINITY
;
233 for (int r
=0; r
<k
; r
++)
236 //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)...
237 //*phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) );
238 //split in several sub-steps
240 //compute det(Rho(:,:,r)) [TODO: avoid re-computations]
241 for (int j
=0; j
<m
; j
++)
243 for (int jj
=0; jj
<m
; jj
++)
244 matrixE
->data
[j
*m
+jj
] = Rho
[ai(j
,jj
,r
,m
,m
,k
)];
246 gsl_linalg_LU_decomp(matrixE
, permutation
, &signum
);
247 Real detRhoR
= gsl_linalg_LU_det(matrixE
, signum
);
249 //compute Y(i,:)*Rho(:,:,r)
250 for (int j
=0; j
<m
; j
++)
253 for (int u
=0; u
<m
; u
++)
254 YiRhoR
[j
] += Y
[mi(i
,u
,n
,m
)] * Rho
[ai(u
,j
,r
,m
,m
,k
)];
257 //compute X(i,:)*phi(:,:,r)
258 for (int j
=0; j
<m
; j
++)
261 for (int u
=0; u
<p
; u
++)
262 XiPhiR
[j
] += X
[mi(i
,u
,n
,p
)] * phi
[ai(u
,j
,r
,p
,m
,k
)];
265 //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) >
266 Real dotProduct
= 0.0;
267 for (int u
=0; u
<m
; u
++)
268 dotProduct
+= (YiRhoR
[u
]-XiPhiR
[u
]) * (YiRhoR
[u
]-XiPhiR
[u
]);
269 Real logGamIR
= log(Pi
[r
]) + log(detRhoR
) - 0.5*dotProduct
;
271 //Z(i) = index of max (gam(i,:))
272 if (logGamIR
> maxLogGamIR
)
275 maxLogGamIR
= logGamIR
;
277 sumLLF1
+= exp(logGamIR
) / pow(2*M_PI
,m
/2.0);
280 sumLogLLF2
+= log(sumLLF1
);
283 // Assign output variable LLF
284 *LLF
= -invN
* sumLogLLF2
;
286 //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi))));
287 Real newDeltaPhi
= 0.0;
288 for (int j
=0; j
<p
; j
++)
290 for (int jj
=0; jj
<m
; jj
++)
292 for (int r
=0; r
<k
; r
++)
294 Real tmpDist
= fabs(phi
[ai(j
,jj
,r
,p
,m
,k
)]-Phi
[ai(j
,jj
,r
,p
,m
,k
)])
295 / (1.0+fabs(phi
[ai(j
,jj
,r
,p
,m
,k
)]));
296 if (tmpDist
> newDeltaPhi
)
297 newDeltaPhi
= tmpDist
;
302 //update distance parameter to check algorithm convergence (delta(phi, Phi))
303 //TODO: deltaPhi should be a linked list for perf.
304 if (ite
< deltaPhiBufferSize
)
305 deltaPhi
[ite
] = newDeltaPhi
;
308 sumDeltaPhi
-= deltaPhi
[0];
309 for (int u
=0; u
<deltaPhiBufferSize
-1; u
++)
310 deltaPhi
[u
] = deltaPhi
[u
+1];
311 deltaPhi
[deltaPhiBufferSize
-1] = newDeltaPhi
;
313 sumDeltaPhi
+= newDeltaPhi
;
315 // update other local variables
316 for (int j
=0; j
<m
; j
++)
318 for (int jj
=0; jj
<p
; jj
++)
320 for (int r
=0; r
<k
; r
++)
321 Phi
[ai(jj
,j
,r
,p
,m
,k
)] = phi
[ai(jj
,j
,r
,p
,m
,k
)];
331 gsl_matrix_free(matrixE
);
332 gsl_matrix_free(matrixM
);
333 gsl_permutation_free(permutation
);
334 gsl_vector_free(work
);