2e85ce6fd019f9c836cc26d82210b1fc7d3a1556
2 #include <gsl/gsl_linalg.h>
4 // Compute pseudo-inverse of a square matrix
5 static double* pinv(const double* matrix
, int dim
)
7 gsl_matrix
* U
= gsl_matrix_alloc(dim
,dim
);
8 gsl_matrix
* V
= gsl_matrix_alloc(dim
,dim
);
9 gsl_vector
* S
= gsl_vector_alloc(dim
);
10 gsl_vector
* work
= gsl_vector_alloc(dim
);
11 double EPS
= 1e-10; //threshold for singular value "== 0"
14 copyArray(matrix
, U
->data
, dim
*dim
);
16 //U,S,V = SVD of matrix
17 gsl_linalg_SV_decomp(U
, V
, S
, work
);
18 gsl_vector_free(work
);
20 // Obtain pseudo-inverse by V*S^{-1}*t(U)
21 double* inverse
= (double*)malloc(dim
*dim
*sizeof(double));
22 for (int i
=0; i
<dim
; i
++)
24 for (int ii
=0; ii
<dim
; ii
++)
26 double dotProduct
= 0.0;
27 for (int j
=0; j
<dim
; j
++)
28 dotProduct
+= V
->data
[i
*dim
+j
] * (S
->data
[j
] > EPS
? 1.0/S
->data
[j
] : 0.0) * U
->data
[ii
*dim
+j
];
29 inverse
[i
*dim
+ii
] = dotProduct
;
39 // TODO: comment EMGrank purpose
42 const double* Pi
, // parametre de proportion
43 const double* Rho
, // parametre initial de variance renormalisé
44 int mini
, // nombre minimal d'itérations dans l'algorithme EM
45 int maxi
, // nombre maximal d'itérations dans l'algorithme EM
46 const double* X
, // régresseurs
47 const double* Y
, // réponse
48 double tau
, // seuil pour accepter la convergence
49 const int* rank
, // vecteur des rangs possibles
51 double* phi
, // parametre de moyenne renormalisé, calculé par l'EM
52 double* LLF
, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
53 // additional size parameters
54 int n
, // taille de l'echantillon
55 int p
, // nombre de covariables
56 int m
, // taille de Y (multivarié)
57 int k
) // nombre de composantes
59 // Allocations, initializations
60 double* Phi
= (double*)calloc(p
*m
*k
,sizeof(double));
61 double* hatBetaR
= (double*)malloc(p
*m
*sizeof(double));
64 int deltaPhiBufferSize
= 20;
65 double* deltaPhi
= (double*)malloc(deltaPhiBufferSize
*sizeof(double));
67 double sumDeltaPhi
= 0.0;
68 double* YiRhoR
= (double*)malloc(m
*sizeof(double));
69 double* XiPhiR
= (double*)malloc(m
*sizeof(double));
70 double* Xr
= (double*)malloc(n
*p
*sizeof(double));
71 double* Yr
= (double*)malloc(n
*m
*sizeof(double));
72 double* tXrXr
= (double*)malloc(p
*p
*sizeof(double));
73 double* tXrYr
= (double*)malloc(p
*m
*sizeof(double));
74 gsl_matrix
* matrixM
= gsl_matrix_alloc(p
, m
);
75 gsl_matrix
* matrixE
= gsl_matrix_alloc(m
, m
);
76 gsl_permutation
* permutation
= gsl_permutation_alloc(m
);
77 gsl_matrix
* V
= gsl_matrix_alloc(m
,m
);
78 gsl_vector
* S
= gsl_vector_alloc(m
);
79 gsl_vector
* work
= gsl_vector_alloc(m
);
81 //Initialize class memberships (all elements in class 0; TODO: randomize ?)
82 int* Z
= (int*)calloc(n
, sizeof(int));
84 //Initialize phi to zero, because some M loops might exit before phi affectation
85 for (int i
=0; i
<p
*m
*k
; i
++)
88 while (ite
<mini
|| (ite
<maxi
&& sumDeltaPhi
>tau
))
94 //M step: Mise à jour de Beta (et donc phi)
95 for (int r
=0; r
<k
; r
++)
97 //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:)
99 for (int i
=0; i
<n
; i
++)
103 for (int j
=0; j
<p
; j
++)
104 Xr
[mi(cardClustR
,j
,n
,p
)] = X
[mi(i
,j
,n
,p
)];
105 for (int j
=0; j
<m
; j
++)
106 Yr
[mi(cardClustR
,j
,n
,m
)] = Y
[mi(i
,j
,n
,m
)];
113 //Compute tXrXr = t(Xr) * Xr
114 for (int j
=0; j
<p
; j
++)
116 for (int jj
=0; jj
<p
; jj
++)
118 double dotProduct
= 0.0;
119 for (int u
=0; u
<cardClustR
; u
++)
120 dotProduct
+= Xr
[mi(u
,j
,n
,p
)] * Xr
[mi(u
,jj
,n
,p
)];
121 tXrXr
[mi(j
,jj
,p
,p
)] = dotProduct
;
125 //Get pseudo inverse = (t(Xr)*Xr)^{-1}
126 double* invTXrXr
= pinv(tXrXr
, p
);
128 // Compute tXrYr = t(Xr) * Yr
129 for (int j
=0; j
<p
; j
++)
131 for (int jj
=0; jj
<m
; jj
++)
133 double dotProduct
= 0.0;
134 for (int u
=0; u
<cardClustR
; u
++)
135 dotProduct
+= Xr
[mi(u
,j
,n
,p
)] * Yr
[mi(u
,j
,n
,m
)];
136 tXrYr
[mi(j
,jj
,p
,m
)] = dotProduct
;
140 //Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr
141 for (int j
=0; j
<p
; j
++)
143 for (int jj
=0; jj
<m
; jj
++)
145 double dotProduct
= 0.0;
146 for (int u
=0; u
<p
; u
++)
147 dotProduct
+= invTXrXr
[mi(j
,u
,p
,p
)] * tXrYr
[mi(u
,jj
,p
,m
)];
148 matrixM
->data
[j
*m
+jj
] = dotProduct
;
153 //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
154 gsl_linalg_SV_decomp(matrixM
, V
, S
, work
);
156 //Set m-rank(r) singular values to zero, and recompose
157 //best rank(r) approximation of the initial product
158 for (int j
=rank
[r
]; j
<m
; j
++)
161 //[intermediate step] Compute hatBetaR = U * S * t(V)
162 double* U
= matrixM
->data
;
163 for (int j
=0; j
<p
; j
++)
165 for (int jj
=0; jj
<m
; jj
++)
167 double dotProduct
= 0.0;
168 for (int u
=0; u
<m
; u
++)
169 dotProduct
+= U
[j
*m
+u
] * S
->data
[u
] * V
->data
[jj
*m
+u
];
170 hatBetaR
[mi(j
,jj
,p
,m
)] = dotProduct
;
174 //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r)
175 for (int j
=0; j
<p
; j
++)
177 for (int jj
=0; jj
<m
; jj
++)
179 double dotProduct
=0.0;
180 for (int u
=0; u
<m
; u
++)
181 dotProduct
+= hatBetaR
[mi(j
,u
,p
,m
)] * Rho
[ai(u
,jj
,r
,m
,m
,k
)];
182 phi
[ai(j
,jj
,r
,p
,m
,k
)] = dotProduct
;
191 double sumLogLLF2
= 0.0;
192 for (int i
=0; i
<n
; i
++)
194 double sumLLF1
= 0.0;
195 double maxLogGamIR
= -INFINITY
;
196 for (int r
=0; r
<k
; r
++)
199 //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)...
200 //*phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) );
201 //split in several sub-steps
203 //compute det(Rho(:,:,r)) [TODO: avoid re-computations]
204 for (int j
=0; j
<m
; j
++)
206 for (int jj
=0; jj
<m
; jj
++)
207 matrixE
->data
[j
*m
+jj
] = Rho
[ai(j
,jj
,r
,m
,m
,k
)];
209 gsl_linalg_LU_decomp(matrixE
, permutation
, &signum
);
210 double detRhoR
= gsl_linalg_LU_det(matrixE
, signum
);
212 //compute Y(i,:)*Rho(:,:,r)
213 for (int j
=0; j
<m
; j
++)
216 for (int u
=0; u
<m
; u
++)
217 YiRhoR
[j
] += Y
[mi(i
,u
,n
,m
)] * Rho
[ai(u
,j
,r
,m
,m
,k
)];
220 //compute X(i,:)*phi(:,:,r)
221 for (int j
=0; j
<m
; j
++)
224 for (int u
=0; u
<p
; u
++)
225 XiPhiR
[j
] += X
[mi(i
,u
,n
,p
)] * phi
[ai(u
,j
,r
,p
,m
,k
)];
228 //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) >
229 double dotProduct
= 0.0;
230 for (int u
=0; u
<m
; u
++)
231 dotProduct
+= (YiRhoR
[u
]-XiPhiR
[u
]) * (YiRhoR
[u
]-XiPhiR
[u
]);
232 double logGamIR
= log(Pi
[r
]) + log(detRhoR
) - 0.5*dotProduct
;
234 //Z(i) = index of max (gam(i,:))
235 if (logGamIR
> maxLogGamIR
)
238 maxLogGamIR
= logGamIR
;
240 sumLLF1
+= exp(logGamIR
) / pow(2*M_PI
,m
/2.0);
243 sumLogLLF2
+= log(sumLLF1
);
246 // Assign output variable LLF
247 *LLF
= -invN
* sumLogLLF2
;
249 //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi))));
250 double newDeltaPhi
= 0.0;
251 for (int j
=0; j
<p
; j
++)
253 for (int jj
=0; jj
<m
; jj
++)
255 for (int r
=0; r
<k
; r
++)
257 double tmpDist
= fabs(phi
[ai(j
,jj
,r
,p
,m
,k
)]-Phi
[ai(j
,jj
,r
,p
,m
,k
)])
258 / (1.0+fabs(phi
[ai(j
,jj
,r
,p
,m
,k
)]));
259 if (tmpDist
> newDeltaPhi
)
260 newDeltaPhi
= tmpDist
;
265 //update distance parameter to check algorithm convergence (delta(phi, Phi))
266 //TODO: deltaPhi should be a linked list for perf.
267 if (ite
< deltaPhiBufferSize
)
268 deltaPhi
[ite
] = newDeltaPhi
;
271 sumDeltaPhi
-= deltaPhi
[0];
272 for (int u
=0; u
<deltaPhiBufferSize
-1; u
++)
273 deltaPhi
[u
] = deltaPhi
[u
+1];
274 deltaPhi
[deltaPhiBufferSize
-1] = newDeltaPhi
;
276 sumDeltaPhi
+= newDeltaPhi
;
278 // update other local variables
279 for (int j
=0; j
<m
; j
++)
281 for (int jj
=0; jj
<p
; jj
++)
283 for (int r
=0; r
<k
; r
++)
284 Phi
[ai(j
,jj
,r
,p
,m
,k
)] = phi
[ai(j
,jj
,r
,p
,m
,k
)];
294 gsl_matrix_free(matrixE
);
295 gsl_matrix_free(matrixM
);
296 gsl_permutation_free(permutation
);
297 gsl_vector_free(work
);