From: Benjamin Auder Date: Fri, 5 Jun 2020 15:36:43 +0000 (+0200) Subject: Fix SVD error when p < m in EMGrank.c X-Git-Url: https://git.auder.net/variants/current/css/doc/img/scripts/config.php?a=commitdiff_plain;h=93dcdea364307b9d5ebe71958deb5df2a48547b3;p=valse.git Fix SVD error when p < m in EMGrank.c --- diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index ac46366..edb3356 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -28,7 +28,7 @@ Suggests: roxygen2 URL: http://git.auder.net/?p=valse.git License: MIT + file LICENSE -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 Collate: 'plot_valse.R' 'main.R' diff --git a/pkg/src/EMGrank.c b/pkg/src/EMGrank.c index a98ff91..14b693a 100644 --- a/pkg/src/EMGrank.c +++ b/pkg/src/EMGrank.c @@ -5,303 +5,340 @@ // Compute pseudo-inverse of a square matrix static Real* pinv(const Real* matrix, int dim) { - gsl_matrix* U = gsl_matrix_alloc(dim,dim); - gsl_matrix* V = gsl_matrix_alloc(dim,dim); - gsl_vector* S = gsl_vector_alloc(dim); - gsl_vector* work = gsl_vector_alloc(dim); - Real EPS = 1e-10; //threshold for singular value "== 0" - - //copy matrix into U - copyArray(matrix, U->data, dim*dim); + gsl_matrix* U = gsl_matrix_alloc(dim,dim); + gsl_matrix* V = gsl_matrix_alloc(dim,dim); + gsl_vector* S = gsl_vector_alloc(dim); + gsl_vector* work = gsl_vector_alloc(dim); + Real EPS = 1e-10; //threshold for singular value "== 0" + + //copy matrix into U + copyArray(matrix, U->data, dim*dim); - //U,S,V = SVD of matrix - gsl_linalg_SV_decomp(U, V, S, work); - gsl_vector_free(work); + //U,S,V = SVD of matrix + gsl_linalg_SV_decomp(U, V, S, work); + gsl_vector_free(work); - // Obtain pseudo-inverse by V*S^{-1}*t(U) - Real* inverse = (Real*)malloc(dim*dim*sizeof(Real)); - for (int i=0; idata[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j]; - inverse[i*dim+ii] = dotProduct; - } - } - - gsl_matrix_free(U); - gsl_matrix_free(V); - gsl_vector_free(S); - return inverse; + // Obtain pseudo-inverse by V*S^{-1}*t(U) + Real* inverse = (Real*)malloc(dim*dim*sizeof(Real)); + for (int i=0; idata[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j]; + inverse[i*dim+ii] = dotProduct; + } + } + + gsl_matrix_free(U); + gsl_matrix_free(V); + gsl_vector_free(S); + return inverse; } // TODO: comment EMGrank purpose void EMGrank_core( - // IN parameters - const Real* Pi, // parametre de proportion - const Real* Rho, // parametre initial de variance renormalise - int mini, // nombre minimal d'iterations dans l'algorithme EM - int maxi, // nombre maximal d'iterations dans l'algorithme EM - const Real* X, // regresseurs - const Real* Y, // reponse - Real tau, // seuil pour accepter la convergence - const int* rank, // vecteur des rangs possibles - // OUT parameters - Real* phi, // parametre de moyenne renormalise, calcule par l'EM - Real* LLF, // log vraisemblance associe a cet echantillon, pour les valeurs estimees des parametres - // additional size parameters - int n, // taille de l'echantillon - int p, // nombre de covariables - int m, // taille de Y (multivarie) - int k) // nombre de composantes + // IN parameters + const Real* Pi, // parametre de proportion + const Real* Rho, // parametre initial de variance renormalise + int mini, // nombre minimal d'iterations dans l'algorithme EM + int maxi, // nombre maximal d'iterations dans l'algorithme EM + const Real* X, // regresseurs + const Real* Y, // reponse + Real tau, // seuil pour accepter la convergence + const int* rank, // vecteur des rangs possibles + // OUT parameters + Real* phi, // parametre de moyenne renormalise, calcule par l'EM + Real* LLF, // log vraisemblance associe a cet echantillon, pour les valeurs estimees des parametres + // additional size parameters + int n, // taille de l'echantillon + int p, // nombre de covariables + int m, // taille de Y (multivarie) + int k) // nombre de composantes { - // Allocations, initializations - Real* Phi = (Real*)calloc(p*m*k,sizeof(Real)); - Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real)); - int signum; - Real invN = 1.0/n; - int deltaPhiBufferSize = 20; - Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real)); - int ite = 0; - Real sumDeltaPhi = 0.0; - Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); - Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); - Real* Xr = (Real*)malloc(n*p*sizeof(Real)); - Real* Yr = (Real*)malloc(n*m*sizeof(Real)); - Real* tXrXr = (Real*)malloc(p*p*sizeof(Real)); - Real* tXrYr = (Real*)malloc(p*m*sizeof(Real)); - gsl_matrix* matrixM = gsl_matrix_alloc(p, m); - gsl_matrix* matrixE = gsl_matrix_alloc(m, m); - gsl_permutation* permutation = gsl_permutation_alloc(m); - gsl_matrix* V = gsl_matrix_alloc(m,m); - gsl_vector* S = gsl_vector_alloc(m); - gsl_vector* work = gsl_vector_alloc(m); + // Allocations, initializations + Real* Phi = (Real*)calloc(p*m*k,sizeof(Real)); + Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real)); + int signum; + Real invN = 1.0/n; + int deltaPhiBufferSize = 20; + Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real)); + int ite = 0; + Real sumDeltaPhi = 0.0; + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real* Xr = (Real*)malloc(n*p*sizeof(Real)); + Real* Yr = (Real*)malloc(n*m*sizeof(Real)); + Real* tXrXr = (Real*)malloc(p*p*sizeof(Real)); + Real* tXrYr = (Real*)malloc(p*m*sizeof(Real)); + gsl_matrix* matrixM = gsl_matrix_alloc(p, m); + gsl_matrix* matrixE = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + gsl_matrix* V = gsl_matrix_alloc(m,m); + gsl_vector* S = gsl_vector_alloc(m); + gsl_vector* work = gsl_vector_alloc(m); - //Initialize class memberships (all elements in class 0; TODO: randomize ?) - int* Z = (int*)calloc(n, sizeof(int)); + //Initialize class memberships (all elements in class 0; TODO: randomize ?) + int* Z = (int*)calloc(n, sizeof(int)); - //Initialize phi to zero, because some M loops might exit before phi affectation - zeroArray(phi, p*m*k); + //Initialize phi to zero, because some M loops might exit before phi affectation + zeroArray(phi, p*m*k); - while (itetau)) - { - ///////////// - // Etape M // - ///////////// - - //M step: Mise a jour de Beta (et donc phi) - for (int r=0; rtau)) + { + ///////////// + // Etape M // + ///////////// + + //M step: Mise a jour de Beta (et donc phi) + for (int r=0; rdata[j*m+jj] = dotProduct; - } - } - free(invTXrXr); + // Compute tXrYr = t(Xr) * Yr + for (int j=0; jdata[j*m+jj] = dotProduct; + } + } + free(invTXrXr); - //Set m-rank(r) singular values to zero, and recompose - //best rank(r) approximation of the initial product - for (int j=rank[r]; jdata[j] = 0.0; - - //[intermediate step] Compute hatBetaR = U * S * t(V) - double* U = matrixM->data; //GSL require double precision - for (int j=0; jdata[u] * V->data[jj*m+u]; - hatBetaR[mi(j,jj,p,m)] = dotProduct; - } - } + //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + if (p >= m) + gsl_linalg_SV_decomp(matrixM, V, S, work); + else + { + gsl_matrix* matrixM_ = gsl_matrix_alloc(m, p); + for (int j=0; jdata[jj*m+j] = matrixM->data[j*p +jj]; + } + gsl_matrix* V_ = gsl_matrix_alloc(p,p); + gsl_vector* S_ = gsl_vector_alloc(p); + gsl_vector* work_ = gsl_vector_alloc(p); + gsl_linalg_SV_decomp(matrixM_, V_, S_, work_); + gsl_vector_free(work_); + for (int j=0; jdata[j] = S_->data[j]; + else + S->data[j] = 0.0; + for (int jj=0; jjdata[jj * m + j] = V_->data[jj * m + j]; + else + V->data[jj * m + j] = 0.0; + } + } + for (int j=0; jdata[j*p+jj] = matrixM_->data[jj*m +j]; + } + gsl_matrix_free(matrixM_); + gsl_matrix_free(V_); + gsl_vector_free(S_); + } - //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r) - for (int j=0; jdata[j] = 0.0; + + //[intermediate step] Compute hatBetaR = U * S * t(V) + double* U = matrixM->data; //GSL require double precision + for (int j=0; jdata[u] * V->data[jj*m+u]; + hatBetaR[mi(j,jj,p,m)] = dotProduct; + } + } - ///////////// - // Etape E // - ///////////// - - Real sumLogLLF2 = 0.0; - for (int i=0; idata[j*m+jj] = Rho[ai(j,jj,r,m,m,k)]; - } - gsl_linalg_LU_decomp(matrixE, permutation, &signum); - Real detRhoR = gsl_linalg_LU_det(matrixE, signum); + //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r) + for (int j=0; jdata[j*m+jj] = Rho[ai(j,jj,r,m,m,k)]; + } + gsl_linalg_LU_decomp(matrixE, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrixE, signum); - //compute X(i,:)*phi(:,:,r) - for (int j=0; j - Real dotProduct = 0.0; - for (int u=0; u maxLogGamIR) - { - Z[i] = r; - maxLogGamIR = logGamIR; - } - sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0); - } + //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) > + Real dotProduct = 0.0; + for (int u=0; u maxLogGamIR) + { + Z[i] = r; + maxLogGamIR = logGamIR; + } + sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0); + } - // Assign output variable LLF - *LLF = -invN * sumLogLLF2; + sumLogLLF2 += log(sumLLF1); + } - //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi)))); - Real newDeltaPhi = 0.0; - for (int j=0; j newDeltaPhi) - newDeltaPhi = tmpDist; - } - } - } + // Assign output variable LLF + *LLF = -invN * sumLogLLF2; - //update distance parameter to check algorithm convergence (delta(phi, Phi)) - //TODO: deltaPhi should be a linked list for perf. - if (ite < deltaPhiBufferSize) - deltaPhi[ite] = newDeltaPhi; - else - { - sumDeltaPhi -= deltaPhi[0]; - for (int u=0; u newDeltaPhi) + newDeltaPhi = tmpDist; + } + } + } - // update other local variables - for (int j=0; j