e41fe3c3f5c3f76a2f7bec23a757904a285e0d2c
3 #include <gsl/gsl_linalg.h>
5 // TODO: don't recompute indexes ai(...) and mi(...) when possible
8 const Real
* phiInit
, // parametre initial de moyenne renormalisé
9 const Real
* rhoInit
, // parametre initial de variance renormalisé
10 const Real
* piInit
, // parametre initial des proportions
11 const Real
* gamInit
, // paramètre initial des probabilités a posteriori de chaque échantillon
12 int mini
, // nombre minimal d'itérations dans l'algorithme EM
13 int maxi
, // nombre maximal d'itérations dans l'algorithme EM
14 Real gamma
, // puissance des proportions dans la pénalisation pour un Lasso adaptatif
15 Real lambda
, // valeur du paramètre de régularisation du Lasso
16 const Real
* X
, // régresseurs
17 const Real
* Y
, // réponse
18 Real tau
, // seuil pour accepter la convergence
19 // OUT parameters (all pointers, to be modified)
20 Real
* phi
, // parametre de moyenne renormalisé, calculé par l'EM
21 Real
* rho
, // parametre de variance renormalisé, calculé par l'EM
22 Real
* pi
, // parametre des proportions renormalisé, calculé par l'EM
23 Real
* LLF
, // log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres
25 // additional size parameters
26 int n
, // nombre d'echantillons
27 int p
, // nombre de covariables
28 int m
, // taille de Y (multivarié)
29 int k
) // nombre de composantes dans le mélange
32 copyArray(phiInit
, phi
, p
*m
*k
);
33 copyArray(rhoInit
, rho
, m
*m
*k
);
34 copyArray(piInit
, pi
, k
);
36 //S is already allocated, and doesn't need to be 'zeroed'
38 //Other local variables: same as in R
39 Real
* gam
= (Real
*)malloc(n
*k
*sizeof(Real
));
40 copyArray(gamInit
, gam
, n
*k
);
41 Real
* Gram2
= (Real
*)malloc(p
*p
*k
*sizeof(Real
));
42 Real
* ps2
= (Real
*)malloc(p
*m
*k
*sizeof(Real
));
43 Real
* b
= (Real
*)malloc(k
*sizeof(Real
));
44 Real
* X2
= (Real
*)malloc(n
*p
*k
*sizeof(Real
));
45 Real
* Y2
= (Real
*)malloc(n
*m
*k
*sizeof(Real
));
49 Real
* pi2
= (Real
*)malloc(k
*sizeof(Real
));
50 Real
* ps
= (Real
*)malloc(m
*k
*sizeof(Real
));
51 Real
* nY2
= (Real
*)malloc(m
*k
*sizeof(Real
));
52 Real
* ps1
= (Real
*)malloc(n
*m
*k
*sizeof(Real
));
53 Real
* Gam
= (Real
*)malloc(n
*k
*sizeof(Real
));
54 const Real EPS
= 1e-15;
55 // Additional (not at this place, in R file)
56 Real
* gam2
= (Real
*)malloc(k
*sizeof(Real
));
57 Real
* nY21
= (Real
*)malloc(n
*m
*k
*sizeof(Real
));
58 Real
* sqNorm2
= (Real
*)malloc(k
*sizeof(Real
));
59 gsl_matrix
* matrix
= gsl_matrix_alloc(m
, m
);
60 gsl_permutation
* permutation
= gsl_permutation_alloc(m
);
61 Real
* YiRhoR
= (Real
*)malloc(m
*sizeof(Real
));
62 Real
* XiPhiR
= (Real
*)malloc(m
*sizeof(Real
));
63 const Real gaussConstM
= pow(2.*M_PI
,m
/2.);
64 Real
* Phi
= (Real
*)malloc(p
*m
*k
*sizeof(Real
));
65 Real
* Rho
= (Real
*)malloc(m
*m
*k
*sizeof(Real
));
66 Real
* Pi
= (Real
*)malloc(k
*sizeof(Real
));
68 while (ite
< mini
|| (ite
< maxi
&& (dist
>= tau
|| dist2
>= sqrt(tau
))))
70 copyArray(phi
, Phi
, p
*m
*k
);
71 copyArray(rho
, Rho
, m
*m
*k
);
74 // Calculs associés a Y et X
75 for (int r
=0; r
<k
; r
++)
77 for (int mm
=0; mm
<m
; mm
++)
79 //Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
80 for (int u
=0; u
<n
; u
++)
81 Y2
[ai(u
,mm
,r
,n
,m
,k
)] = sqrt(gam
[mi(u
,r
,n
,k
)]) * Y
[mi(u
,mm
,m
,n
)];
83 for (int i
=0; i
<n
; i
++)
85 //X2[i,,r] = sqrt(gam[i,r]) * X[i,]
86 for (int u
=0; u
<p
; u
++)
87 X2
[ai(i
,u
,r
,n
,p
,k
)] = sqrt(gam
[mi(i
,r
,n
,k
)]) * X
[mi(i
,u
,n
,p
)];
89 for (int mm
=0; mm
<m
; mm
++)
91 //ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
92 for (int u
=0; u
<p
; u
++)
95 for (int v
=0; v
<n
; v
++)
96 dotProduct
+= X2
[ai(v
,u
,r
,n
,p
,k
)] * Y2
[ai(v
,mm
,r
,n
,m
,k
)];
97 ps2
[ai(u
,mm
,r
,p
,m
,k
)] = dotProduct
;
100 for (int j
=0; j
<p
; j
++)
102 for (int s
=0; s
<p
; s
++)
104 //Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
105 Real dotProduct
= 0.;
106 for (int u
=0; u
<n
; u
++)
107 dotProduct
+= X2
[ai(u
,j
,r
,n
,p
,k
)] * X2
[ai(u
,s
,r
,n
,p
,k
)];
108 Gram2
[ai(j
,s
,r
,p
,p
,k
)] = dotProduct
;
118 for (int r
=0; r
<k
; r
++)
120 //b[r] = sum(abs(phi[,,r]))
122 for (int u
=0; u
<p
; u
++)
123 for (int v
=0; v
<m
; v
++)
124 sumAbsPhi
+= fabs(phi
[ai(u
,v
,r
,p
,m
,k
)]);
127 //gam2 = colSums(gam)
128 for (int u
=0; u
<k
; u
++)
130 Real sumOnColumn
= 0.;
131 for (int v
=0; v
<n
; v
++)
132 sumOnColumn
+= gam
[mi(v
,u
,n
,k
)];
133 gam2
[u
] = sumOnColumn
;
135 //a = sum(gam %*% log(pi))
137 for (int u
=0; u
<n
; u
++)
139 Real dotProduct
= 0.;
140 for (int v
=0; v
<k
; v
++)
141 dotProduct
+= gam
[mi(u
,v
,n
,k
)] * log(pi
[v
]);
145 //tant que les proportions sont negatives
147 int pi2AllPositive
= 0;
149 while (!pi2AllPositive
)
151 //pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
152 Real pow_01_kk
= pow(0.1,kk
);
153 for (int r
=0; r
<k
; r
++)
154 pi2
[r
] = pi
[r
] + pow_01_kk
* (invN
*gam2
[r
] - pi
[r
]);
155 //pi2AllPositive = all(pi2 >= 0)
157 for (int r
=0; r
<k
; r
++)
169 Real piPowGammaDotB
= 0.;
170 for (int v
=0; v
<k
; v
++)
171 piPowGammaDotB
+= pow(pi
[v
],gamma
) * b
[v
];
173 Real pi2PowGammaDotB
= 0.;
174 for (int v
=0; v
<k
; v
++)
175 pi2PowGammaDotB
+= pow(pi2
[v
],gamma
) * b
[v
];
176 //transpose(gam2)*log(pi2)
177 Real prodGam2logPi2
= 0.;
178 for (int v
=0; v
<k
; v
++)
179 prodGam2logPi2
+= gam2
[v
] * log(pi2
[v
]);
180 //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
181 while (-invN
*a
+ lambda
*piPowGammaDotB
< -invN
*prodGam2logPi2
+ lambda
*pi2PowGammaDotB
184 Real pow_01_kk
= pow(0.1,kk
);
185 //pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
186 for (int v
=0; v
<k
; v
++)
187 pi2
[v
] = pi
[v
] + pow_01_kk
* (invN
*gam2
[v
] - pi
[v
]);
188 //pi2 was updated, so we recompute pi2PowGammaDotB and prodGam2logPi2
189 pi2PowGammaDotB
= 0.;
190 for (int v
=0; v
<k
; v
++)
191 pi2PowGammaDotB
+= pow(pi2
[v
],gamma
) * b
[v
];
193 for (int v
=0; v
<k
; v
++)
194 prodGam2logPi2
+= gam2
[v
] * log(pi2
[v
]);
197 Real t
= pow(0.1,kk
);
198 //sum(pi + t*(pi2-pi))
199 Real sumPiPlusTbyDiff
= 0.;
200 for (int v
=0; v
<k
; v
++)
201 sumPiPlusTbyDiff
+= (pi
[v
] + t
*(pi2
[v
] - pi
[v
]));
202 //pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
203 for (int v
=0; v
<k
; v
++)
204 pi
[v
] = (pi
[v
] + t
*(pi2
[v
] - pi
[v
])) / sumPiPlusTbyDiff
;
207 for (int r
=0; r
<k
; r
++)
209 for (int mm
=0; mm
<m
; mm
++)
211 for (int i
=0; i
<n
; i
++)
213 //< X2(i,:,r) , phi(:,mm,r) >
214 Real dotProduct
= 0.;
215 for (int u
=0; u
<p
; u
++)
216 dotProduct
+= X2
[ai(i
,u
,r
,n
,p
,k
)] * phi
[ai(u
,mm
,r
,p
,m
,k
)];
217 //ps1[i,mm,r] = Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
218 ps1
[ai(i
,mm
,r
,n
,m
,k
)] = Y2
[ai(i
,mm
,r
,n
,m
,k
)] * dotProduct
;
219 nY21
[ai(i
,mm
,r
,n
,m
,k
)] = Y2
[ai(i
,mm
,r
,n
,m
,k
)] * Y2
[ai(i
,mm
,r
,n
,m
,k
)];
221 //ps[mm,r] = sum(ps1[,mm,r])
223 for (int u
=0; u
<n
; u
++)
224 sumPs1
+= ps1
[ai(u
,mm
,r
,n
,m
,k
)];
225 ps
[mi(mm
,r
,m
,k
)] = sumPs1
;
226 //nY2[mm,r] = sum(nY21[,mm,r])
228 for (int u
=0; u
<n
; u
++)
229 sumNy21
+= nY21
[ai(u
,mm
,r
,n
,m
,k
)];
230 nY2
[mi(mm
,r
,m
,k
)] = sumNy21
;
231 //rho[mm,mm,r] = (ps[mm,r]+sqrt(ps[mm,r]^2+4*nY2[mm,r]*(gam2[r]))) / (2*nY2[mm,r])
232 rho
[ai(mm
,mm
,r
,m
,m
,k
)] = ( ps
[mi(mm
,r
,m
,k
)] + sqrt( ps
[mi(mm
,r
,m
,k
)]*ps
[mi(mm
,r
,m
,k
)]
233 + 4*nY2
[mi(mm
,r
,m
,k
)] * gam2
[r
] ) ) / (2*nY2
[mi(mm
,r
,m
,k
)]);
236 for (int r
=0; r
<k
; r
++)
238 for (int j
=0; j
<p
; j
++)
240 for (int mm
=0; mm
<m
; mm
++)
242 //sum(phi[-j,mm,r] * Gram2[j, setdiff(1:p,j),r])
243 Real dotPhiGram2
= 0.0;
244 for (int u
=0; u
<p
; u
++)
247 dotPhiGram2
+= phi
[ai(u
,mm
,r
,p
,m
,k
)] * Gram2
[ai(j
,u
,r
,p
,p
,k
)];
249 //S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j, setdiff(1:p,j),r])
250 S
[ai(j
,mm
,r
,p
,m
,k
)] = -rho
[ai(mm
,mm
,r
,m
,m
,k
)] * ps2
[ai(j
,mm
,r
,p
,m
,k
)] + dotPhiGram2
;
251 Real pow_pir_gamma
= pow(pi
[r
],gamma
);
252 if (fabs(S
[ai(j
,mm
,r
,p
,m
,k
)]) <= n
*lambda
*pow_pir_gamma
)
253 phi
[ai(j
,mm
,r
,p
,m
,k
)] = 0;
254 else if (S
[ai(j
,mm
,r
,p
,m
,k
)] > n
*lambda
*pow_pir_gamma
)
256 phi
[ai(j
,mm
,r
,p
,m
,k
)] = (n
*lambda
*pow_pir_gamma
- S
[ai(j
,mm
,r
,p
,m
,k
)])
257 / Gram2
[ai(j
,j
,r
,p
,p
,k
)];
261 phi
[ai(j
,mm
,r
,p
,m
,k
)] = -(n
*lambda
*pow_pir_gamma
+ S
[ai(j
,mm
,r
,p
,m
,k
)])
262 / Gram2
[ai(j
,j
,r
,p
,p
,k
)];
273 Real sumLogLLF2
= 0.;
274 for (int i
=0; i
<n
; i
++)
276 for (int r
=0; r
<k
; r
++)
278 //compute Y[i,]%*%rho[,,r]
279 for (int u
=0; u
<m
; u
++)
282 for (int v
=0; v
<m
; v
++)
283 YiRhoR
[u
] += Y
[mi(i
,v
,n
,m
)] * rho
[ai(v
,u
,r
,m
,m
,k
)];
286 //compute X(i,:)*phi(:,:,r)
287 for (int u
=0; u
<m
; u
++)
290 for (int v
=0; v
<p
; v
++)
291 XiPhiR
[u
] += X
[mi(i
,v
,n
,p
)] * phi
[ai(v
,u
,r
,p
,m
,k
)];
294 //compute sq norm || Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) ||_2^2
296 for (int u
=0; u
<m
; u
++)
297 sqNorm2
[r
] += (YiRhoR
[u
]-XiPhiR
[u
]) * (YiRhoR
[u
]-XiPhiR
[u
]);
302 for (int r
=0; r
<k
; r
++)
304 //compute det(rho[,,r]) [TODO: avoid re-computations]
305 for (int u
=0; u
<m
; u
++)
307 for (int v
=0; v
<m
; v
++)
308 matrix
->data
[u
*m
+v
] = rho
[ai(u
,v
,r
,m
,m
,k
)];
310 gsl_linalg_LU_decomp(matrix
, permutation
, &signum
);
311 Real detRhoR
= gsl_linalg_LU_det(matrix
, signum
);
312 Gam
[mi(i
,r
,n
,k
)] = pi
[r
] * exp(-.5*sqNorm2
[r
]) * detRhoR
;
313 sumLLF1
+= Gam
[mi(i
,r
,n
,k
)] / gaussConstM
;
314 sumGamI
+= Gam
[mi(i
,r
,n
,k
)];
316 sumLogLLF2
+= log(sumLLF1
);
317 for (int r
=0; r
<k
; r
++)
319 //gam[i,] = Gam[i,] / sumGamI
320 gam
[mi(i
,r
,n
,k
)] = sumGamI
> EPS
? Gam
[mi(i
,r
,n
,k
)] / sumGamI
: 0.;
324 //sumPen = sum(pi^gamma * b)
326 for (int r
=0; r
<k
; r
++)
327 sumPen
+= pow(pi
[r
],gamma
) * b
[r
];
328 //LLF[ite] = -sumLogLLF2/n + lambda*sumPen
329 LLF
[ite
] = -invN
* sumLogLLF2
+ lambda
* sumPen
;
330 dist
= ite
==0 ? LLF
[ite
] : (LLF
[ite
] - LLF
[ite
-1]) / (1. + fabs(LLF
[ite
]));
332 //Dist1 = max( abs(phi-Phi) / (1+abs(phi)) )
334 for (int u
=0; u
<p
; u
++)
336 for (int v
=0; v
<m
; v
++)
338 for (int w
=0; w
<k
; w
++)
340 Real tmpDist
= fabs(phi
[ai(u
,v
,w
,p
,m
,k
)]-Phi
[ai(u
,v
,w
,p
,m
,k
)])
341 / (1.+fabs(phi
[ai(u
,v
,w
,p
,m
,k
)]));
347 //Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
349 for (int u
=0; u
<m
; u
++)
351 for (int v
=0; v
<m
; v
++)
353 for (int w
=0; w
<k
; w
++)
355 Real tmpDist
= fabs(rho
[ai(u
,v
,w
,m
,m
,k
)]-Rho
[ai(u
,v
,w
,m
,m
,k
)])
356 / (1.+fabs(rho
[ai(u
,v
,w
,m
,m
,k
)]));
362 //Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)))
364 for (int u
=0; u
<n
; u
++)
366 for (int v
=0; v
<k
; v
++)
368 Real tmpDist
= fabs(pi
[v
]-Pi
[v
]) / (1.+fabs(pi
[v
]));
373 //dist2=max([max(Dist1),max(Dist2),max(Dist3)]);
396 gsl_matrix_free(matrix
);
397 gsl_permutation_free(permutation
);