From: Benjamin Auder Date: Wed, 16 Nov 2016 13:31:39 +0000 (+0100) Subject: initial commit X-Git-Url: https://git.auder.net/doc/html/packages.html?a=commitdiff_plain;h=1d3c1faaef57d906a7f12490040398b252ff049d;p=valse.git initial commit --- 1d3c1faaef57d906a7f12490040398b252ff049d diff --git a/InputParameters/basicInitParameters.m b/InputParameters/basicInitParameters.m new file mode 100644 index 0000000..50410f5 --- /dev/null +++ b/InputParameters/basicInitParameters.m @@ -0,0 +1,19 @@ +function[phiInit,rhoInit,piInit,gamInit] = basicInitParameters(n,p,m,k) + + phiInit = zeros(p,m,k); + + piInit = (1.0/k) * ones(1,k); + + rhoInit = zeros(m,m,k); + for r=1:k + rhoInit(:,:,r) = eye(m,m); + end + + gamInit = 0.1 * ones(n,k); + R = random('unid',k,n,1); + for i=1:n + gamInit(i,R(i)) = 0.9; + end + gamInit = gamInit / (sum(gamInit(1,:))); + +end diff --git a/InputParameters/compileMex.m b/InputParameters/compileMex.m new file mode 100644 index 0000000..9e5f64b --- /dev/null +++ b/InputParameters/compileMex.m @@ -0,0 +1,7 @@ +%compile C code (for MATLAB or Octave) +if exist('octave_config_info') + setenv('CFLAGS','-O2 -std=gnu99 -fopenmp') + mkoctfile --mex -DOctave -I../Util -I../ProcLassoMLE selectiontotale.c selectiontotale_interface.c ../Util/ioutils.c ../ProcLassoMLE/EMGLLF.c -o selectiontotale -lm -lgsl -lgslcblas -lgomp +else + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util -I../ProcLassoMLE selectiontotale.c selectiontotale_interface.c ../Util/ioutils.c ../ProcLassoMLE/EMGLLF.c -output selectiontotale -lm -lgsl -lgslcblas -lgomp +end diff --git a/InputParameters/generateIO.m b/InputParameters/generateIO.m new file mode 100644 index 0000000..c677fd2 --- /dev/null +++ b/InputParameters/generateIO.m @@ -0,0 +1,37 @@ +%X is generated following a gaussian mixture \sum pi_r N(meanX_k, covX_r) +%Y is generated then, with Y_i ~ \sum pi_r N(Beta_r.X_i, covY_r) +function[X,Y,Z] = generateIO(meanX, covX, covY, pi, beta, n) + + [p, ~, k] = size(covX); + [m, ~, ~] = size(covY); + if exist('octave_config_info') + %Octave statistics package doesn't have gmdistribution() + X = zeros(n, p); + Z = zeros(n); + cs = cumsum(pi); + for i=1:n + %TODO: vectorize ? http://stackoverflow.com/questions/2977497/weighted-random-numbers-in-matlab + tmpRand01 = rand(); + [~,Z(i)] = min(cs - tmpRand01 >= 0); + X(i,:) = mvnrnd(meanX(Z(i),:), covX(:,:,Z(i)), 1); + end + else + gmDistX = gmdistribution(meanX, covX, pi); + [X, Z] = random(gmDistX, n); + end + + Y = zeros(n, m); + BX = zeros(n,m,k); + for i=1:n + for r=1:k + %compute beta_r . X_i + BXir = zeros(1, m); + for mm=1:m + BXir(mm) = dot(X(i,:), beta(:,mm,r)); + end + %add pi(r) * N(beta_r . X_i, covY) to Y_i + Y(i,:) = Y(i,:) + pi(r) * mvnrnd(BXir, covY(:,:,r), 1); + end + end + +end diff --git a/InputParameters/generateIOdefault.m b/InputParameters/generateIOdefault.m new file mode 100644 index 0000000..f4c3c1f --- /dev/null +++ b/InputParameters/generateIOdefault.m @@ -0,0 +1,23 @@ +%call generateIO with default parameters (random means, covariances = identity, equirepartition) +function[X,Y,Z] = generateIOdefault(n, p, m, k) + + rangeX = 100; + meanX = rangeX * (1 - 2*rand(k, p)); + covX = zeros(p,p,k); + covY = zeros(m,m,k); + for r=1:k + covX(:,:,r) = eye(p); + covY(:,:,r) = eye(m); + end + pi = (1/k) * ones(1,k); + + %initialize beta to a random number of non-zero random value + beta = zeros(p,m,k); + for j=1:p + nonZeroCount = ceil(m*rand(1)); + beta(j,1:nonZeroCount,:) = rand(nonZeroCount, k); + end + + [X,Y,Z] = generateIO(meanX, covX, covY, pi, beta, n); + +end diff --git a/InputParameters/grillelambda.m b/InputParameters/grillelambda.m new file mode 100644 index 0000000..8d90665 --- /dev/null +++ b/InputParameters/grillelambda.m @@ -0,0 +1,17 @@ +function[gridLambda] = grillelambda(phiInit,rhoInit,piInit,gamInit,X,Y,gamma,mini,maxi,tau) + + n = size(X, 1); + [p,m,k] = size(phiInit); + [phi,rho,pi,~,S] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,1,0,X,Y,tau); + + gridLambda = zeros(p,m,k); + for j=1:p + for mm=1:m + gridLambda(j,mm,:) = abs(reshape(S(j,mm,:),[k,1])) ./ (n*pi.^gamma); + end + end + + gridLambda = unique(gridLambda); + gridLambda(gridLambda()>1) = []; + +end diff --git a/InputParameters/initSmallEM.m b/InputParameters/initSmallEM.m new file mode 100644 index 0000000..6a2ccc9 --- /dev/null +++ b/InputParameters/initSmallEM.m @@ -0,0 +1,33 @@ +function[phiInit,rhoInit,piInit,gamInit] = initSmallEM(k,x,y,tau) +[n,m]=size(y); +gamInit1=zeros(n,k,20); +for repet=1:20 + Zinit1(:,repet)=clusterdata(y,k); + for r=1:k + betaInit1(:,:,r,repet)=pinv(transpose(x(Zinit1(:,repet)==r,:))*x(Zinit1(:,repet)==r,:))*transpose(x(Zinit1(:,repet)==r,:))*y(Zinit1(:,repet)==r,:); + sigmaInit1(:,:,r,repet)=eye(m); + phiInit1(:,:,r,repet)=betaInit1(:,:,r,repet)/sigmaInit1(:,:,r,repet); + rhoInit1(:,:,r,repet)=inv(sigmaInit1(:,:,r,repet)); + piInit1(repet,r)=sum(Zinit1(:,repet)==r)/n; + end + for i=1:n + for r=1:k + dotProduct = (y(i,:)*rhoInit1(:,:,r,repet)-x(i,:)*phiInit1(:,:,r,repet)) * transpose(y(i,:)*rhoInit1(:,:,r,repet)-x(i,:)*phiInit1(:,:,r,repet)); + Gam(i,r) = piInit1(repet,r)*det(rhoInit1(:,:,r,repet))*exp(-0.5*dotProduct); + end + sumGamI = sum(Gam(i,:)); + gamInit1(i,:,repet) = Gam(i,:) / sumGamI; + end + miniInit=int64(10); + maxiInit=int64(11); + + [~,~,~,LLFEssai,~] = EMGLLF(phiInit1(:,:,:,repet),rhoInit1(:,:,:,repet),piInit1(repet,:),gamInit1(:,:,repet),miniInit,maxiInit,1,0,x,y,tau); + LLFinit1(repet)=LLFEssai(end); +end +[~,b]=max(LLFinit1); + +phiInit=phiInit1(:,:,:,b); +rhoInit=rhoInit1(:,:,:,b); +piInit=piInit1(b,:); +gamInit=gamInit1(:,:,b); +end \ No newline at end of file diff --git a/InputParameters/selectiontotale.c b/InputParameters/selectiontotale.c new file mode 100644 index 0000000..f3ed95b --- /dev/null +++ b/InputParameters/selectiontotale.c @@ -0,0 +1,142 @@ +#include "selectiontotale.h" +#include "EMGLLF.h" +#include +#include "omp_num_threads.h" + +// Main job on raw inputs (after transformation from mxArray) +void selectiontotale( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + Int mini, // nombre minimal d'itérations dans lambdaIndex'algorithme EM + Int maxi, // nombre maximal d'itérations dans lambdaIndex'algorithme EM + Real gamma, // valeur de gamma : puissance des proportions dans la pénalisation pour un Lasso adaptatif + const Real* glambda, // valeur des paramètres de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real seuil, // seuil pour prendre en compte une variable + Real tau, // seuil pour accepter la convergence + // OUT parameters (all pointers, to be modified) + Int* A1, // matrice des coefficients des parametres selectionnes + Int* A2, // matrice des coefficients des parametres non selectionnes + Real* Rho, // estimateur ainsi calculé par le Lasso + Real* Pi, // estimateur ainsi calculé par le Lasso + // additional size parameters + mwSize n, // taille de lambdaIndex'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k, // nombre de composantes + mwSize L) // taille de glambda +{ + // Fill outputs with zeros: they might not be assigned + for (int u=0; u maxPhi) + maxPhi = fabs(phi[j*m*k+jj*k+r]); + } + if (maxPhi > seuil) + { + selectedVariables[j*m+cpt] = jj+1; + atLeastOneSelectedVariable = 1; + cpt++; + } + else + { + discardedVariables[j*m+cpt2] = jj+1; + cpt2++; + } + } + } + free(phi); + + if (atLeastOneSelectedVariable) + { + Int* vec = (Int*)malloc(p*sizeof(Int)); + mwSize vecSize = 0; + for (mwSize j=0; j seuil + selectedVariables(j,cpt) = mm; + cpt = cpt+1; + atLeastOneSelectedVariable = true; + else + discardedVariables(j,cpt2) = mm; + cpt2 = cpt2+1; + end + end + end + + %Si aucun des coefficients n'a été gardé on renvoit la matrice nulle + %Et si on enlevait ces colonnes de zéro ??? Indices des colonnes vides + if atLeastOneSelectedVariable + vec = []; + for j=1:p + if selectedVariables(j,1) ~= 0 + vec = [vec;j]; + end + end + + %Sinon on renvoit les numéros des coefficients utiles + A1(:,1,lambdaIndex) = [vec;zeros(p-length(vec),1)]; + A1(1:length(vec),2:m+1,lambdaIndex) = selectedVariables(vec,:); + A2(:,1,lambdaIndex) = 1:p; + A2(:,2:m+1,lambdaIndex) = discardedVariables; + Rho(:,:,:,lambdaIndex) = rho; + Pi(:,lambdaIndex) = pi; + end + + end + +end diff --git a/InputParameters/selectiontotale_interface.c b/InputParameters/selectiontotale_interface.c new file mode 100644 index 0000000..b4b8f4c --- /dev/null +++ b/InputParameters/selectiontotale_interface.c @@ -0,0 +1,127 @@ +#include "ioutils.h" +#include "selectiontotale.h" +#include "EMGLLF.h" +#include + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction(int nlhs, mxArray* plhs[], + int nrhs, const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=12) + mexErrMsgIdAndTxt("select:selectiontotale:nrhs","12 inputs required."); + if (nlhs!=4) + mexErrMsgIdAndTxt("select:selectiontotale:nlhs","4 outputs required."); + + // Get matrices dimensions, to be given to main routine above + const mwSize n = mxGetDimensions(prhs[8])[0]; + const mwSize p = mxGetDimensions(prhs[8])[1]; + const mwSize m = mxGetDimensions(prhs[1])[0]; + const mwSize k = mxGetDimensions(prhs[1])[2]; + const mwSize L = mxGetNumberOfElements(prhs[7]); + + //////////// + // INPUTS // + //////////// + + // phiInit + const mwSize* dimPhiInit = mxGetDimensions(prhs[0]); + Real* brPhiInit = matlabToBrArray_real(mxGetPr(prhs[0]), dimPhiInit, 3); + + // rhoInit + const mwSize* dimRhoInit = mxGetDimensions(prhs[1]); + Real* brRhoInit = matlabToBrArray_real(mxGetPr(prhs[1]), dimRhoInit, 3); + + // piInit + Real* piInit = mxGetPr(prhs[2]); + + // gamInit + const mwSize* dimGamInit = mxGetDimensions(prhs[3]); + Real* brGamInit = matlabToBrArray_real(mxGetPr(prhs[3]), dimGamInit, 2); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[4]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[5]))[0]; + + // gamma + Real gamma = mxGetScalar(prhs[6]); + + // glambda + Real* glambda = mxGetPr(prhs[7]); + + // X + const mwSize* dimX = mxGetDimensions(prhs[8]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[8]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[9]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[9]), dimY, 2); + + //seuil + Real seuil = mxGetScalar(prhs[10]); + + // tau + Real tau = mxGetScalar(prhs[11]); + + ///////////// + // OUTPUTS // + ///////////// + + // A1 + mwSize dimA[] = {p,m+1,L}; + plhs[0] = mxCreateNumericArray(3,dimA,mxGetClassID(prhs[4]),mxREAL); + Int* A1 = (Int*)mxGetData(plhs[0]); + + // A2 + plhs[1] = mxCreateNumericArray(3,dimA,mxGetClassID(prhs[4]),mxREAL); + Int* A2 = (Int*)mxGetData(plhs[1]); + + // rho + const mwSize dimRho[] = {dimRhoInit[0], dimRhoInit[1], dimRhoInit[2], L}; + plhs[2] = mxCreateNumericArray(4,dimRho,mxDOUBLE_CLASS,mxREAL); + Real* Rho = mxGetPr(plhs[2]); + + // pi + const mwSize dimPi[] = {k, L}; + plhs[3] = mxCreateNumericMatrix(dimPi[0],dimPi[1],mxDOUBLE_CLASS,mxREAL); + double* Pi = mxGetPr(plhs[3]); + + ///////////////////////////// + // Call to selectiontotale // + ///////////////////////////// + + selectiontotale(brPhiInit,brRhoInit,piInit,brGamInit,mini,maxi,gamma,glambda,brX,brY,seuil,tau, + A1,A2,Rho,Pi, + n,p,m,k,L); + + free(brPhiInit); + free(brRhoInit); + free(brGamInit); + free(brX); + free(brY); + + //post-processing: convert by-rows outputs to MATLAB matrices + Int* mlA1 = brToMatlabArray_int(A1,dimA,3); + copyArray(mlA1,A1,dimA[0]*dimA[1]*dimA[2]); + free(mlA1); + + Int* mlA2 = brToMatlabArray_int(A2,dimA,3); + copyArray(mlA2,A2,dimA[0]*dimA[1]*dimA[2]); + free(mlA2); + + Real* mlRho = brToMatlabArray_real(Rho, dimRho, 4); + copyArray(mlRho, Rho, dimRho[0]*dimRho[1]*dimRho[2]*dimRho[3]); + free(mlRho); + + Real* mlPi = brToMatlabArray_real(Pi, dimPi, 2); + copyArray(mlPi, Pi, dimPi[0]*dimPi[1]); + free(mlPi); +} diff --git a/ProcLassoMLE/EMGLLF.c b/ProcLassoMLE/EMGLLF.c new file mode 100644 index 0000000..96b81b3 --- /dev/null +++ b/ProcLassoMLE/EMGLLF.c @@ -0,0 +1,409 @@ +#include "EMGLLF.h" +#include + +// TODO: comment on EMGLLF purpose +void EMGLLF( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + Real gamma, // valeur de gamma : puissance des proportions dans la pénalisation pour un Lasso adaptatif + Real lambda, // valeur du paramètre de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + // OUT parameters (all pointers, to be modified) + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* rho, // parametre de variance renormalisé, calculé par l'EM + Real* pi, // parametre des proportions renormalisé, calculé par l'EM + Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres + Real* S, + // additional size parameters + mwSize n, // nombre d'echantillons + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k) // nombre de composantes dans le mélange +{ + //Initialize outputs + copyArray(phiInit, phi, p*m*k); + copyArray(rhoInit, rho, m*m*k); + copyArray(piInit, pi, k); + zeroArray(LLF, maxi); + //S is already allocated, and doesn't need to be 'zeroed' + + //Other local variables + //NOTE: variables order is always [maxi],n,p,m,k + Real* gam = (Real*)malloc(n*k*sizeof(Real)); + copyArray(gamInit, gam, n*k); + Real* b = (Real*)malloc(k*sizeof(Real)); + Real* Phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* Rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* Pi = (Real*)malloc(k*sizeof(Real)); + Real* gam2 = (Real*)malloc(k*sizeof(Real)); + Real* pi2 = (Real*)malloc(k*sizeof(Real)); + Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real)); + Real* ps = (Real*)malloc(m*k*sizeof(Real)); + Real* nY2 = (Real*)malloc(m*k*sizeof(Real)); + Real* ps1 = (Real*)malloc(n*m*k*sizeof(Real)); + Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real)); + Real* nY21 = (Real*)malloc(n*m*k*sizeof(Real)); + Real* Gam = (Real*)malloc(n*k*sizeof(Real)); + Real* X2 = (Real*)malloc(n*p*k*sizeof(Real)); + Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real)); + gsl_matrix* matrix = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real dist = 0.0; + Real dist2 = 0.0; + Int ite = 0; + Real EPS = 1e-15; + Real* dotProducts = (Real*)malloc(k*sizeof(Real)); + + while (ite < mini || (ite < maxi && (dist >= tau || dist2 >= sqrt(tau)))) + { + copyArray(phi, Phi, p*m*k); + copyArray(rho, Rho, m*m*k); + copyArray(pi, Pi, k); + + // Calculs associes a Y et X + for (mwSize r=0; r + Real dotProduct = 0.0; + for (mwSize u=0; u n*lambda*pow(pi[r],gamma)) + phi[j*m*k+mm*k+r] = (n*lambda*pow(pi[r],gamma) - S[j*m*k+mm*k+r]) + / Gram2[j*p*k+j*k+r]; + else + phi[j*m*k+mm*k+r] = -(n*lambda*pow(pi[r],gamma) + S[j*m*k+mm*k+r]) + / Gram2[j*p*k+j*k+r]; + } + } + } + + ///////////// + // Etape E // + ///////////// + + int signum; + Real sumLogLLF2 = 0.0; + for (mwSize i=0; i + dotProducts[r] = 0.0; + for (mwSize u=0; udata[u*m+v] = rho[u*m*k+v*k+r]; + } + gsl_linalg_LU_decomp(matrix, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrix, signum); + + Gam[i*k+r] = pi[r] * detRhoR * exp(-0.5*dotProducts[r] + shift); + sumLLF1 += Gam[i*k+r] / pow(2*M_PI,m/2.0); + sumGamI += Gam[i*k+r]; + } + sumLogLLF2 += log(sumLLF1); + for (mwSize r=0; r EPS + ? Gam[i*k+r] / sumGamI + : 0.0; + } + } + + //sum(pen(ite,:)) + Real sumPen = 0.0; + for (mwSize r=0; r Dist1) + Dist1 = tmpDist; + } + } + } + //Dist2=max(max((abs(rho-Rho))./(1+abs(rho)))); + Real Dist2 = 0.0; + for (mwSize u=0; u Dist2) + Dist2 = tmpDist; + } + } + } + //Dist3=max(max((abs(pi-Pi))./(1+abs(Pi)))); + Real Dist3 = 0.0; + for (mwSize u=0; u Dist3) + Dist3 = tmpDist; + } + } + //dist2=max([max(Dist1),max(Dist2),max(Dist3)]); + dist2 = Dist1; + if (Dist2 > dist2) + dist2 = Dist2; + if (Dist3 > dist2) + dist2 = Dist3; + + ite++; + } + + //free memory + free(b); + free(gam); + free(Gam); + free(Phi); + free(Rho); + free(Pi); + free(ps); + free(nY2); + free(ps1); + free(nY21); + free(Gram2); + free(ps2); + gsl_matrix_free(matrix); + gsl_permutation_free(permutation); + free(XiPhiR); + free(YiRhoR); + free(gam2); + free(pi2); + free(X2); + free(Y2); + free(dotProducts); +} diff --git a/ProcLassoMLE/EMGLLF.h b/ProcLassoMLE/EMGLLF.h new file mode 100644 index 0000000..c75b89d --- /dev/null +++ b/ProcLassoMLE/EMGLLF.h @@ -0,0 +1,31 @@ +#ifndef select_EMGLLF_H +#define select_EMGLLF_H + +#include "ioutils.h" + +void EMGLLF( + // IN parameters + const Real* phiInit, + const Real* rhoInit, + const Real* piInit, + const Real* gamInit, + Int mini, + Int maxi, + Real gamma, + Real lambda, + const Real* X, + const Real* Y, + Real tau, + // OUT parameters + Real* phi, + Real* rho, + Real* pi, + Real* LLF, + Real* S, + // additional size parameters + mwSize n, + mwSize p, + mwSize m, + mwSize k); + +#endif diff --git a/ProcLassoMLE/EMGLLF.m b/ProcLassoMLE/EMGLLF.m new file mode 100644 index 0000000..1be6ba0 --- /dev/null +++ b/ProcLassoMLE/EMGLLF.m @@ -0,0 +1,174 @@ +function[phi,rho,pi,LLF,S] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau) + + %Get matrices dimensions + PI = 4.0 * atan(1.0); + n = size(X, 1); + [p,m,k] = size(phiInit); + + %Initialize outputs + phi = phiInit; + rho = rhoInit; + pi = piInit; + LLF = zeros(maxi,1); + S = zeros(p,m,k); + + %Other local variables + %NOTE: variables order is always n,p,m,k + gam = gamInit; + Gram2 = zeros(p,p,k); + ps2 = zeros(p,m,k); + b = zeros(k,1); + pen = zeros(maxi,k); + X2 = zeros(n,p,k); + Y2 = zeros(n,m,k); + dist = 0; + dist2 = 0; + ite = 1; + pi2 = zeros(k,1); + ps = zeros(m,k); + nY2 = zeros(m,k); + ps1 = zeros(n,m,k); + nY21 = zeros(n,m,k); + Gam = zeros(n,k); + EPS = 1e-15; + + while ite<=mini || (ite<=maxi && (dist>=tau || dist2>=sqrt(tau))) + + Phi = phi; + Rho = rho; + Pi = pi; + + %Calculs associés à Y et X + for r=1:k + for mm=1:m + Y2(:,mm,r) = sqrt(gam(:,r)) .* Y(:,mm); + end + for i=1:n + X2(i,:,r) = X(i,:) .* sqrt(gam(i,r)); + end + for mm=1:m + ps2(:,mm,r) = transpose(X2(:,:,r)) * Y2(:,mm,r); + end + for j=1:p + for s=1:p + Gram2(j,s,r) = dot(X2(:,j,r), X2(:,s,r)); + end + end + end + + %%%%%%%%%% + %Etape M % + %%%%%%%%%% + + %Pour pi + for r=1:k + b(r) = sum(sum(abs(phi(:,:,r)))); + end + gam2 = sum(gam,1); + a = sum(gam*transpose(log(pi))); + + %tant que les proportions sont negatives + kk = 0; + pi2AllPositive = false; + while ~pi2AllPositive + pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi); + pi2AllPositive = true; + for r=1:k + if pi2(r) < 0 + pi2AllPositive = false; + break; + end + end + kk = kk+1; + end + + %t(m) la plus grande valeur dans la grille O.1^k tel que ce soit + %décroissante ou constante + while (-1/n*a+lambda*((pi.^gamma)*b))<(-1/n*gam2*transpose(log(pi2))+lambda.*(pi2.^gamma)*b) && kk<1000 + pi2 = pi+0.1^kk*(1/n*gam2-pi); + kk = kk+1; + end + t = 0.1^(kk); + pi = (pi+t*(pi2-pi)) / sum(pi+t*(pi2-pi)); + + %Pour phi et rho + for r=1:k + for mm=1:m + for i=1:n + ps1(i,mm,r) = Y2(i,mm,r) * dot(X2(i,:,r), phi(:,mm,r)); + nY21(i,mm,r) = (Y2(i,mm,r))^2; + end + ps(mm,r) = sum(ps1(:,mm,r)); + nY2(mm,r) = sum(nY21(:,mm,r)); + rho(mm,mm,r) = ((ps(mm,r)+sqrt(ps(mm,r)^2+4*nY2(mm,r)*(gam2(r))))/(2*nY2(mm,r))); + end + end + for r=1:k + for j=1:p + for mm=1:m + S(j,mm,r) = -rho(mm,mm,r)*ps2(j,mm,r) + dot(phi(1:j-1,mm,r),Gram2(j,1:j-1,r)')... + + dot(phi(j+1:p,mm,r),Gram2(j,j+1:p,r)'); + if abs(S(j,mm,r)) <= n*lambda*(pi(r)^gamma) + phi(j,mm,r)=0; + else + if S(j,mm,r)> n*lambda*(pi(r)^gamma) + phi(j,mm,r)=(n*lambda*(pi(r)^gamma)-S(j,mm,r))/Gram2(j,j,r); + else + phi(j,mm,r)=-(n*lambda*(pi(r)^gamma)+S(j,mm,r))/Gram2(j,j,r); + end + end + end + end + end + + %%%%%%%%%% + %Etape E % + %%%%%%%%%% + + sumLogLLF2 = 0.0; + for i=1:n + %precompute dot products to numerically adjust their values + dotProducts = zeros(k,1); + for r=1:k + dotProducts(r)= (Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r)); + end + shift = 0.5*min(dotProducts); + + %compute Gam(:,:) using shift determined above + sumLLF1 = 0.0; + for r=1:k + Gam(i,r) = pi(r)*det(rho(:,:,r))*exp(-0.5*dotProducts(r) + shift); + sumLLF1 = sumLLF1 + Gam(i,r)/(2*PI)^(m/2); + end + sumLogLLF2 = sumLogLLF2 + log(sumLLF1); + sumGamI = sum(Gam(i,:)); + if sumGamI > EPS + gam(i,:) = Gam(i,:) / sumGamI; + else + gam(i,:) = zeros(k,1); + end + end + + sumPen = 0.0; + for r=1:k + sumPen = sumPen + pi(r).^gamma .* b(r); + end + LLF(ite) = -(1/n)*sumLogLLF2 + lambda*sumPen; + + if ite == 1 + dist = LLF(ite); + else + dist = (LLF(ite)-LLF(ite-1))/(1+abs(LLF(ite))); + end + + Dist1=max(max(max((abs(phi-Phi))./(1+abs(phi))))); + Dist2=max(max(max((abs(rho-Rho))./(1+abs(rho))))); + Dist3=max(max((abs(pi-Pi))./(1+abs(Pi)))); + dist2=max([Dist1,Dist2,Dist3]); + + ite=ite+1; + end + + pi = transpose(pi); + +end diff --git a/ProcLassoMLE/EMGLLF_interface.c b/ProcLassoMLE/EMGLLF_interface.c new file mode 100644 index 0000000..470e845 --- /dev/null +++ b/ProcLassoMLE/EMGLLF_interface.c @@ -0,0 +1,125 @@ +#include "ioutils.h" +#include "EMGLLF.h" +#include + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction( + int nlhs, + mxArray* plhs[], + int nrhs, + const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=11) + mexErrMsgIdAndTxt("select:EMGLLF:nrhs","11 inputs required."); + if (nlhs!=5) + mexErrMsgIdAndTxt("select:EMGLLF:nlhs","5 outputs required."); + + // Get matrices dimensions + const mwSize n = mxGetDimensions(prhs[8])[0]; + const mwSize p = mxGetDimensions(prhs[0])[0]; + const mwSize m = mxGetDimensions(prhs[0])[1]; + const mwSize k = mxGetDimensions(prhs[0])[2]; + + //////////// + // INPUTS // + //////////// + + // phiInit + const mwSize* dimPhiInit = mxGetDimensions(prhs[0]); + Real* brPhiInit = matlabToBrArray_real(mxGetPr(prhs[0]), dimPhiInit, 3); + + // rhoInit + const mwSize* dimRhoInit = mxGetDimensions(prhs[1]); + Real* brRhoInit = matlabToBrArray_real(mxGetPr(prhs[1]), dimRhoInit, 3); + + // piInit + Real* piInit = mxGetPr(prhs[2]); + + // gamInit + const mwSize* dimGamInit = mxGetDimensions(prhs[3]); + Real* brGamInit = matlabToBrArray_real(mxGetPr(prhs[3]), dimGamInit, 2); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[4]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[5]))[0]; + + // gamma + Real gamma = mxGetScalar(prhs[6]); + + // lambda + Real lambda = mxGetScalar(prhs[7]); + + // X + const mwSize* dimX = mxGetDimensions(prhs[8]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[8]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[9]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[9]), dimY, 2); + + // tau + Real tau = mxGetScalar(prhs[10]); + + ///////////// + // OUTPUTS // + ///////////// + + // phi + const mwSize dimPhi[] = {dimPhiInit[0], dimPhiInit[1], dimPhiInit[2]}; + plhs[0] = mxCreateNumericArray(3,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // rho + const mwSize dimRho[] = {dimRhoInit[0], dimRhoInit[1], dimRhoInit[2]}; + plhs[1] = mxCreateNumericArray(3,dimRho,mxDOUBLE_CLASS,mxREAL); + Real* rho = mxGetPr(plhs[1]); + + // pi + plhs[2] = mxCreateNumericMatrix(k,1,mxDOUBLE_CLASS,mxREAL); + Real* pi = mxGetPr(plhs[2]); + + // LLF + plhs[3] = mxCreateNumericMatrix(maxi,1,mxDOUBLE_CLASS,mxREAL); + Real* LLF = mxGetPr(plhs[3]); + + // S + const mwSize dimS[] = {p, m, k}; + plhs[4] = mxCreateNumericArray(3,dimS,mxDOUBLE_CLASS,mxREAL); + Real* S = mxGetPr(plhs[4]); + + //////////////////// + // Call to EMGLLF // + //////////////////// + + EMGLLF(brPhiInit,brRhoInit,piInit,brGamInit,mini,maxi,gamma,lambda,brX,brY,tau, + phi,rho,pi,LLF,S, + n,p,m,k); + + free(brPhiInit); + free(brRhoInit); + free(brGamInit); + free(brX); + free(brY); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 3); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]); + free(mlPhi); + + Real* mlRho = brToMatlabArray_real(rho, dimRho, 3); + copyArray(mlRho, rho, dimRho[0]*dimRho[1]*dimRho[2]); + free(mlRho); + + Real* mlS = brToMatlabArray_real(S, dimS, 3); + copyArray(mlS, S, dimS[0]*dimS[1]*dimS[2]); + free(mlS); +} diff --git a/ProcLassoMLE/compileMex.m b/ProcLassoMLE/compileMex.m new file mode 100644 index 0000000..e595409 --- /dev/null +++ b/ProcLassoMLE/compileMex.m @@ -0,0 +1,9 @@ +%compile C code (for MATLAB or Octave) +if exist('octave_config_info') + setenv('CFLAGS','-O2 -std=gnu99 -fopenmp') + mkoctfile --mex -DOctave -I../Util EMGLLF.c EMGLLF_interface.c ../Util/ioutils.c -o EMGLLF -lm -lgsl -lgslcblas -lgomp + mkoctfile --mex -DOctave -I../Util constructionModelesLassoMLE.c constructionModelesLassoMLE_interface.c EMGLLF.c ../Util/ioutils.c -o constructionModelesLassoMLE -lm -lgsl -lgslcblas -lgomp +else + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util EMGLLF.c EMGLLF_interface.c ../Util/ioutils.c -output EMGLLF -lm -lgsl -lgslcblas -lgomp + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util constructionModelesLassoMLE.c constructionModelesLassoMLE_interface.c EMGLLF.c ../Util/ioutils.c -output constructionModelesLassoMLE -lm -lgsl -lgslcblas -lgomp +end diff --git a/ProcLassoMLE/constructionModelesLassoMLE.c b/ProcLassoMLE/constructionModelesLassoMLE.c new file mode 100644 index 0000000..bcbfd3c --- /dev/null +++ b/ProcLassoMLE/constructionModelesLassoMLE.c @@ -0,0 +1,218 @@ +#include "EMGLLF.h" +#include "constructionModelesLassoMLE.h" +#include +#include +#include "omp_num_threads.h" + +// TODO: comment on constructionModelesLassoMLE purpose +void constructionModelesLassoMLE( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + Real gamma, // valeur de gamma : puissance des proportions dans la pénalisation pour un Lasso adaptatif + const Real* glambda, // valeur des paramètres de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real seuil, // seuil pour prendre en compte une variable + Real tau, // seuil pour accepter la convergence + const Int* A1, // matrice des coefficients des parametres selectionnes + const Int* A2, // matrice des coefficients des parametres non selectionnes + // OUT parameters + Real* phi, // estimateur ainsi calculé par le Lasso + Real* rho, // estimateur ainsi calculé par le Lasso + Real* pi, // estimateur ainsi calculé par le Lasso + Real* lvraisemblance, // estimateur ainsi calculé par le Lasso + // additional size parameters + mwSize n, // taille de l'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k, // nombre de composantes + mwSize L) // taille de glambda +{ + //preparation: phi = 0 + for (mwSize u=0; u 0 + //~ phi(A2(j,1,lambdaIndex),b,:,lambdaIndex) = 0.0; + //~ end + if (lengthB > 0) + { + for (mwSize mm=0; mmdata[u*m+v] = rho[u*m*k*L+v*k*L+r*L+lambdaIndex]; + } + gsl_linalg_LU_decomp(matrix, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrix, signum); + + //compute Y(i,:)*rho(:,:,r,lambdaIndex) + for (mwSize u=0; u + Real dotProduct = 0.0; + for (mwSize u=0; u 0 + phi(A2(j,1,lambdaIndex),b,:,lambdaIndex) = 0.0; + end + c = A1(j,2:end,lambdaIndex); + c(c==0) = []; + dimension = dimension + length(c); + end + + %on veut calculer l'EMV avec toutes nos estimations + densite = zeros(n,L); + for i=1:n + for r=1:k + delta = Y(i,:)*rho(:,:,r,lambdaIndex) - (X(i,a)*(phi(a,:,r,lambdaIndex))); + densite(i,lambdaIndex) = densite(i,lambdaIndex) +... + pi(r,lambdaIndex)*det(rho(:,:,r,lambdaIndex))/(sqrt(2*PI))^m*exp(-dot(delta,delta)/2.0); + end + end + lvraisemblance(lambdaIndex,1) = sum(log(densite(:,lambdaIndex))); + lvraisemblance(lambdaIndex,2) = (dimension+m+1)*k-1; + end + +end diff --git a/ProcLassoMLE/constructionModelesLassoMLE_interface.c b/ProcLassoMLE/constructionModelesLassoMLE_interface.c new file mode 100644 index 0000000..a331c67 --- /dev/null +++ b/ProcLassoMLE/constructionModelesLassoMLE_interface.c @@ -0,0 +1,142 @@ +#include "ioutils.h" +#include "constructionModelesLassoMLE.h" +#include + +#include + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction( + int nlhs, + mxArray* plhs[], + int nrhs, + const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=14) + mexErrMsgIdAndTxt("select:constructionModelesLassoMLE:nrhs","14 inputs required."); + if (nlhs!=4) + mexErrMsgIdAndTxt("select:constructionModelesLassoMLE:nlhs","4 outputs required."); + + // Get matrices dimensions + const mwSize n = mxGetDimensions(prhs[8])[0]; + const mwSize p = mxGetDimensions(prhs[0])[0]; + const mwSize m = mxGetDimensions(prhs[0])[1]; + const mwSize k = mxGetDimensions(prhs[0])[2]; + const mwSize L = mxGetNumberOfElements(prhs[7]); + + //////////// + // INPUTS // + //////////// + + // phiInit + const mwSize* dimPhiInit = mxGetDimensions(prhs[0]); + Real* brPhiInit = matlabToBrArray_real(mxGetPr(prhs[0]), dimPhiInit, 3); + + // rhoInit + const mwSize* dimRhoInit = mxGetDimensions(prhs[1]); + Real* brRhoInit = matlabToBrArray_real(mxGetPr(prhs[1]), dimRhoInit, 3); + + // piInit + Real* piInit = mxGetPr(prhs[2]); + + // gamInit + const mwSize* dimGamInit = mxGetDimensions(prhs[3]); + Real* brGamInit = matlabToBrArray_real(mxGetPr(prhs[3]), dimGamInit, 2); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[4]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[5]))[0]; + + // gamma + Real gamma = mxGetScalar(prhs[6]); + + // glambda + Real* glambda = mxGetPr(prhs[7]); + + // X + const mwSize* dimX = mxGetDimensions(prhs[8]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[8]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[9]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[9]), dimY, 2); + + //seuil + Real seuil = mxGetScalar(prhs[10]); + + // tau + Real tau = mxGetScalar(prhs[11]); + + // A1 + const mwSize* dimA = mxGetDimensions(prhs[12]); + Int* brA1 = matlabToBrArray_int(mxGetData(prhs[12]), dimA, 3); + + // A2 + Int* brA2 = matlabToBrArray_int(mxGetData(prhs[13]), dimA, 3); + + ///////////// + // OUTPUTS // + ///////////// + + // phi + const mwSize dimPhi[] = {dimPhiInit[0], dimPhiInit[1], dimPhiInit[2], L}; + plhs[0] = mxCreateNumericArray(4,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // rho + const mwSize dimRho[] = {dimRhoInit[0], dimRhoInit[1], dimRhoInit[2], L}; + plhs[1] = mxCreateNumericArray(4,dimRho,mxDOUBLE_CLASS,mxREAL); + Real* rho = mxGetPr(plhs[1]); + + // pi + const mwSize dimPi[] = {k, L}; + plhs[2] = mxCreateNumericMatrix(dimPi[0],dimPi[1],mxDOUBLE_CLASS,mxREAL); + Real* pi = mxGetPr(plhs[2]); + + // lvraisemblance + const mwSize dimLvraisemblance[] = {L, 2}; + plhs[3] = mxCreateNumericMatrix(L, 2, mxDOUBLE_CLASS,mxREAL); + Real* lvraisemblance = mxGetPr(plhs[3]); + + ///////////////////////////////////////// + // Call to constructionModelesLassoMLE // + ///////////////////////////////////////// + + constructionModelesLassoMLE( + brPhiInit,brRhoInit,piInit,brGamInit,mini,maxi,gamma,glambda,brX,brY,seuil,tau,brA1,brA2, + phi,rho,pi,lvraisemblance, + n,p,m,k,L); + + free(brPhiInit); + free(brRhoInit); + free(brGamInit); + free(brX); + free(brY); + free(brA1); + free(brA2); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 4); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]*dimPhi[3]); + free(mlPhi); + + Real* mlRho = brToMatlabArray_real(rho, dimRho, 4); + copyArray(mlRho, rho, dimRho[0]*dimRho[1]*dimRho[2]*dimRho[3]); + free(mlRho); + + Real* mlPi = brToMatlabArray_real(pi, dimPi, 2); + copyArray(mlPi, pi, dimPi[0]*dimPi[1]); + free(mlPi); + + Real* mlLvraisemblance = brToMatlabArray_real(lvraisemblance, dimLvraisemblance, 2); + copyArray(mlLvraisemblance, lvraisemblance, dimLvraisemblance[0]*dimLvraisemblance[1]); + free(mlLvraisemblance); +} diff --git a/ProcLassoRank/EMGrank.c b/ProcLassoRank/EMGrank.c new file mode 100644 index 0000000..721c0c2 --- /dev/null +++ b/ProcLassoRank/EMGrank.c @@ -0,0 +1,308 @@ +#include "EMGrank.h" +#include + +// Compute pseudo-inverse of a square matrix +static Real* pinv(const Real* matrix, mwSize dim) +{ + gsl_matrix* U = gsl_matrix_alloc(dim,dim); + gsl_matrix* V = gsl_matrix_alloc(dim,dim); + gsl_vector* S = gsl_vector_alloc(dim); + gsl_vector* work = gsl_vector_alloc(dim); + Real EPS = 1e-10; //threshold for singular value "== 0" + + //copy matrix into U + for (mwSize i=0; idata[i] = matrix[i]; + + //U,S,V = SVD of matrix + gsl_linalg_SV_decomp(U, V, S, work); + gsl_vector_free(work); + + // Obtain pseudo-inverse by V*S^{-1}*t(U) + Real* inverse = (Real*)malloc(dim*dim*sizeof(Real)); + for (mwSize i=0; idata[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j]; + inverse[i*dim+ii] = dotProduct; + } + } + + gsl_matrix_free(U); + gsl_matrix_free(V); + gsl_vector_free(S); + return inverse; +} + +// TODO: comment EMGrank purpose +void EMGrank( + // IN parameters + const Real* Pi, // parametre de proportion + const Real* Rho, // parametre initial de variance renormalisé + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + const Int* rank, // vecteur des rangs possibles + // OUT parameters + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres + // additional size parameters + mwSize n, // taille de l'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k) // nombre de composantes +{ + // Allocations, initializations + Real* Phi = (Real*)calloc(p*m*k,sizeof(Real)); + Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real)); + int signum; + Real invN = 1.0/n; + int deltaPhiBufferSize = 20; + Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real)); + mwSize ite = 0; + Real sumDeltaPhi = 0.0; + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real* Xr = (Real*)malloc(n*p*sizeof(Real)); + Real* Yr = (Real*)malloc(n*m*sizeof(Real)); + Real* tXrXr = (Real*)malloc(p*p*sizeof(Real)); + Real* tXrYr = (Real*)malloc(p*m*sizeof(Real)); + gsl_matrix* matrixM = gsl_matrix_alloc(p, m); + gsl_matrix* matrixE = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + gsl_matrix* V = gsl_matrix_alloc(m,m); + gsl_vector* S = gsl_vector_alloc(m); + gsl_vector* work = gsl_vector_alloc(m); + + //Initialize class memberships (all elements in class 0; TODO: randomize ?) + Int* Z = (Int*)calloc(n, sizeof(Int)); + + //Initialize phi to zero, because some M loops might exit before phi affectation + for (mwSize i=0; itau)) + { + ///////////// + // Etape M // + ///////////// + + //M step: Mise à jour de Beta (et donc phi) + for (mwSize r=0; rdata[j*m+jj] = dotProduct; + } + } + free(invTXrXr); + + //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + gsl_linalg_SV_decomp(matrixM, V, S, work); + + //Set m-rank(r) singular values to zero, and recompose + //best rank(r) approximation of the initial product + for (mwSize j=rank[r]; jdata[j] = 0.0; + + //[intermediate step] Compute hatBetaR = U * S * t(V) + Real* U = matrixM->data; + for (mwSize j=0; jdata[u] * V->data[jj*m+u]; + hatBetaR[j*m+jj] = dotProduct; + } + } + + //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r) + for (mwSize j=0; jdata[j*m+jj] = Rho[j*m*k+jj*k+r]; + } + gsl_linalg_LU_decomp(matrixE, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrixE, signum); + + //compute Y(i,:)*Rho(:,:,r) + for (mwSize j=0; j + Real dotProduct = 0.0; + for (mwSize u=0; u maxLogGamIR) + { + Z[i] = r; + maxLogGamIR = logGamIR; + } + sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0); + } + + sumLogLLF2 += log(sumLLF1); + } + + // Assign output variable LLF + *LLF = -invN * sumLogLLF2; + + //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi)))); + Real newDeltaPhi = 0.0; + for (mwSize j=0; j newDeltaPhi) + newDeltaPhi = tmpDist; + } + } + } + + //update distance parameter to check algorithm convergence (delta(phi, Phi)) + //TODO: deltaPhi should be a linked list for perf. + if (ite < deltaPhiBufferSize) + deltaPhi[ite] = newDeltaPhi; + else + { + sumDeltaPhi -= deltaPhi[0]; + for (int u=0; utau) + + %M step: Mise à jour de Beta (et donc phi) + for r=1:k + if (sum(Z==r) == 0) + continue; + end + %U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + [U,S,V] = svd(pinv(transpose(X(Z==r,:))*X(Z==r,:))*transpose(X(Z==r,:))*Y(Z==r,:)); + %Set m-rank(r) singular values to zero, and recompose + %best rank(r) approximation of the initial product + S(rank(r)+1:end,:) = 0; + phi(:,:,r) = U * S * transpose(V) * Rho(:,:,r); + end + + %Etape E et calcul de LLF + sumLogLLF2 = 0.0; + for i=1:n + sumLLF1 = 0.0; + maxLogGamIR = -Inf; + for r=1:k + dotProduct = (Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r)); + logGamIR = log(Pi(r)) + log(det(Rho(:,:,r))) - 0.5*dotProduct; + %Z(i) = index of max (gam(i,:)) + if logGamIR > maxLogGamIR + Z(i) = r; + maxLogGamIR = logGamIR; + end + sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2); + end + sumLogLLF2 = sumLogLLF2 + log(sumLLF1); + end + + LLF = -1/n * sumLogLLF2; + + % update distance parameter to check algorithm convergence (delta(phi, Phi)) + deltaPhi = [ deltaPhi, max(max(max((abs(phi-Phi))./(1+abs(phi))))) ]; + if length(deltaPhi) > deltaPhiBufferSize + deltaPhi = deltaPhi(2:length(deltaPhi)); + end + sumDeltaPhi = sum(abs(deltaPhi)); + + % update other local variables + Phi = phi; + ite = ite+1; + + end + +end diff --git a/ProcLassoRank/EMGrank_interface.c b/ProcLassoRank/EMGrank_interface.c new file mode 100644 index 0000000..778ef67 --- /dev/null +++ b/ProcLassoRank/EMGrank_interface.c @@ -0,0 +1,90 @@ +#include "ioutils.h" +#include "EMGrank.h" +#include + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction( + int nlhs, + mxArray* plhs[], + int nrhs, + const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=8) + mexErrMsgIdAndTxt("select:EMGrank:nrhs","8 inputs required."); + if (nlhs!=2) + mexErrMsgIdAndTxt("select:EMGrank:nlhs","3 outputs required."); + + // Get matrices dimensions + const mwSize n = mxGetDimensions(prhs[4])[0]; + const mwSize p = mxGetDimensions(prhs[4])[1]; + const mwSize m = mxGetDimensions(prhs[1])[0]; + const mwSize k = mxGetDimensions(prhs[1])[2]; + + //////////// + // INPUTS // + //////////// + + // Pi + Real* Pi = mxGetPr(prhs[0]); + + // Rho + const mwSize* dimRho = mxGetDimensions(prhs[1]); + Real* brRho = matlabToBrArray_real(mxGetPr(prhs[1]), dimRho, 3); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[2]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[3]))[0]; + + // X + const mwSize* dimX = mxGetDimensions(prhs[4]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[4]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[5]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[5]), dimY, 2); + + // tau + Real tau = mxGetScalar(prhs[6]); + + // rank + Int* rank = (Int*)mxGetData(prhs[7]); + + ///////////// + // OUTPUTS // + ///////////// + + // phi + const mwSize dimPhi[] = {p, m, k}; + plhs[0] = mxCreateNumericArray(3,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // LLF + plhs[1] = mxCreateNumericMatrix(1,1,mxDOUBLE_CLASS,mxREAL); + Real* LLF = mxGetPr(plhs[1]); + + ///////////////////// + // Call to EMGrank // + ///////////////////// + + EMGrank(Pi,brRho,mini,maxi,brX,brY,tau,rank, + phi,LLF, + n,p,m,k); + + free(brRho); + free(brX); + free(brY); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 3); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]); + free(mlPhi); +} diff --git a/ProcLassoRank/compileMex.m b/ProcLassoRank/compileMex.m new file mode 100644 index 0000000..044f442 --- /dev/null +++ b/ProcLassoRank/compileMex.m @@ -0,0 +1,9 @@ +%compile C code (for MATLAB or Octave) +if exist('octave_config_info') + setenv('CFLAGS','-O2 -std=gnu99 -fopenmp') + mkoctfile --mex -DOctave -I../Util EMGrank.c EMGrank_interface.c ../Util/ioutils.c -o EMGrank -lm -lgsl -lgslcblas -lgomp + mkoctfile --mex -DOctave -I../Util constructionModelesLassoRank.c constructionModelesLassoRank_interface.c EMGrank.c ../Util/ioutils.c -o constructionModelesLassoRank -lm -lgsl -lgslcblas -lgomp +else + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util EMGrank.c EMGrank_interface.c ../Util/ioutils.c -output EMGrank -lm -lgsl -lgslcblas -lgomp + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util constructionModelesLassoRank.c constructionModelesLassoRank_interface.c EMGrank.c ../Util/ioutils.c -output constructionModelesLassoRank -lm -lgsl -lgslcblas -lgomp +end diff --git a/ProcLassoRank/constructionModelesLassoRank.c b/ProcLassoRank/constructionModelesLassoRank.c new file mode 100644 index 0000000..98c02d5 --- /dev/null +++ b/ProcLassoRank/constructionModelesLassoRank.c @@ -0,0 +1,131 @@ +#include "EMGrank.h" +#include "constructionModelesLassoRank.h" +#include +#include +#include "omp_num_threads.h" + +// TODO: comment on constructionModelesLassoRank purpose +void constructionModelesLassoRank( + // IN parameters + const Real* Pi, // parametre initial des proportions + const Real* Rho, // parametre initial de variance renormalisé + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + const Int* A1, // matrice des coefficients des parametres selectionnes + Int rangmin, //rang minimum autorisé + Int rangmax, //rang maximum autorisé + // OUT parameters (all pointers, to be modified) + Real* phi, // estimateur ainsi calculé par le Lasso + Real* lvraisemblance, // estimateur ainsi calculé par le Lasso + // additional size parameters + mwSize n, // taille de l'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k, // nombre de composantes + mwSize L) // taille de glambda +{ + //On cherche les rangs possiblement intéressants + Int deltaRank = rangmax-rangmin+1; + mwSize Size = (mwSize)pow(deltaRank,k); + Int* Rank = (Int*)malloc(Size*k*sizeof(Int)); + for (mwSize r=0; r 0 + Real* phiLambda = (Real*)malloc(longueurActive*m*k*sizeof(Real)); + Real LLF; + for (Int j=0; j 0 + for j=1:Size + [phiLambda,LLF] = EMGrank(Pi(:,lambdaIndex),Rho(:,:,:,lambdaIndex),mini,maxi,X(:,active),Y,tau,Rank(j,:)); + lvraisemblance((lambdaIndex-1)*Size+j,:) = [LLF, sum(Rank(j,:) .* (length(active)-Rank(j,:)+m))]; + phi(active,:,:,(lambdaIndex-1)*Size+j) = phiLambda; + end + end + end + +end diff --git a/ProcLassoRank/constructionModelesLassoRank_interface.c b/ProcLassoRank/constructionModelesLassoRank_interface.c new file mode 100644 index 0000000..96908c2 --- /dev/null +++ b/ProcLassoRank/constructionModelesLassoRank_interface.c @@ -0,0 +1,109 @@ +#include "ioutils.h" +#include "constructionModelesLassoRank.h" +#include + +#include + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction(int nlhs, mxArray* plhs[], + int nrhs, const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=10) + mexErrMsgIdAndTxt("select:constructionModelesLassoRank:nrhs","10 inputs required."); + if (nlhs!=2) + mexErrMsgIdAndTxt("select:constructionModelesLassoRank:nlhs","3 outputs required."); + + // Get matrices dimensions, to be given to main routine above + const mwSize n = mxGetDimensions(prhs[4])[0]; + const mwSize p = mxGetDimensions(prhs[4])[1]; + const mwSize m = mxGetDimensions(prhs[1])[0]; + const mwSize k = mxGetDimensions(prhs[1])[2]; + const mwSize L = mxGetDimensions(prhs[7])[1]; + + //////////// + // INPUTS // + //////////// + + // pi + const mwSize* dimPi = mxGetDimensions(prhs[0]); + Real* brPi = matlabToBrArray_real(mxGetPr(prhs[0]), dimPi, 2); + + // rho + const mwSize* dimRho = mxGetDimensions(prhs[1]); + Real* brRho = matlabToBrArray_real(mxGetPr(prhs[1]), dimRho, 4); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[2]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[3]))[0]; + + // X + const mwSize* dimX = mxGetDimensions(prhs[4]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[4]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[5]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[5]), dimY, 2); + + // tau + Real tau = mxGetScalar(prhs[6]); + + // A1 + const mwSize* dimA = mxGetDimensions(prhs[7]); + Int* brA1 = matlabToBrArray_int(mxGetData(prhs[7]), dimA, 2); + + //rangmin + Int rangmin = ((Int*)mxGetData(prhs[8]))[0]; + + //rangmax + Int rangmax = ((Int*)mxGetData(prhs[9]))[0]; + + ///////////// + // OUTPUTS // + ///////////// + + // phi + mwSize Size = pow(rangmax-rangmin+1,k); + const mwSize dimPhi[] = {p, m, k, L*Size}; + plhs[0] = mxCreateNumericArray(4,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // lvraisemblance + const mwSize dimLvraisemblance[] = {L*Size, 2}; + plhs[1] = mxCreateNumericMatrix(dimLvraisemblance[0],dimLvraisemblance[1],mxDOUBLE_CLASS,mxREAL); + Real* lvraisemblance = mxGetPr(plhs[1]); + + + ////////////////////////////////////////// + // Call to constructionModelesLassoRank // + ////////////////////////////////////////// + + constructionModelesLassoRank( + brPi,brRho,mini,maxi,brX,brY,tau,brA1,rangmin,rangmax, + phi,lvraisemblance, + n,p,m,k,L); + + free(brPi); + free(brRho); + free(brX); + free(brY); + free(brA1); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 4); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]*dimPhi[3]); + free(mlPhi); + + Real* mlLvraisemblance = brToMatlabArray_real(lvraisemblance, dimLvraisemblance, 2); + copyArray(mlLvraisemblance, lvraisemblance, dimLvraisemblance[0]*dimLvraisemblance[1]); + free(mlLvraisemblance); + +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..b48c505 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# model SELECTion + +This code is the applied part of the PhD thesis of [Emilie Devijver](http://www.math.u-psud.fr/~devijver/). + +## Description + +The function selmix delivers a multivariate Gaussian mixture in regression model collection. +According to the parameter estimation, we can compute classical model selection criterion, as BIC or AIC, or slope heuristic, using the CAPUSHE package. +The methodology used is described in 'Model-Based Clustering for High-Dimensional Data. Application to Functional Data.', +available at [this location](https://hal.archives-ouvertes.fr/hal-01060063) + +## Arguments + +Regressors, denoted by X (of size n x p) and responses, denoted by Y (of size n x q) are must-have arguments. + +Optionally, we could add + +* gamma: weight power in the Lasso penalty (according to Stadler et al., $\gamma \in \{0,1/2,1\}$; +* mini: the minimum number of iterations; +* maxi: the maximum number of iterations; +* tau: the threshold for stopping EM algorithm; +* kmin and kmax: the bounds of interesting number of components, +* rangmin and rangmax: the bounds of interesting rank values. + +## Usage + + objet = selmix(X,Y) + objet.run(index) + +For index=1, it computes the Lasso-MLE procedure. +For index=2, it computes the Lasso-Rank procedure. + +/!\ Be careful to the current path /!\ + +## Values + +* phiInit, rhoInit, piInit, gamInit: the initialization of the matrices phi, rho, pi and gamma, +* gridLambda: grid of regularization parameters used to select relevant variables (if kmax-kmin=0, it is, if not, it is the last grid of regularization parameters) +* A1,A2: indices of variables selected or not selected (matrices of size (p+1) x q x size(gridLambda)) +* Phi,Rho,Pi: estimations of each parameter thanks to the procedure LassoMLE if compute index=1, and thanks to the procedure LassoRank if computed index=2. + + +## Example + + n=10; + p=10; + q=5; + X=randn(n,p); + Y=randn(n,q); + + objet=selmix(X,Y); + objet.run(1); + objet.run(2); diff --git a/SelectModel/selectionModelesLassoMLE.m b/SelectModel/selectionModelesLassoMLE.m new file mode 100644 index 0000000..ab39371 --- /dev/null +++ b/SelectModel/selectionModelesLassoMLE.m @@ -0,0 +1,12 @@ +%Selection de modèle dans la procédure Lasso-MLE +% Selection de modele : voici les parametres selectionnes pour chaque +% dimension, et l'indice associe +[indiceLassoMLE,D1LassoMLE] = selectionmodele(vraisLassoMLE); + +%on veut tracer la courbe +%figure(2) +%plot(D1LassoMLE/n,vraisLassoMLE(indiceLassoMLE),'.') +%on veut appliquer Capushe : il nous faut un tableau de données +tableauLassoMLE = enregistrerdonnees(D1LassoMLE,vraisLassoMLE(indiceLassoMLE,:),n); +save tableauLassoMLE.mat tableauLassoMLE +%On conclut avec Capushe ! diff --git a/SelectModel/selectionModelesLassoRank.m b/SelectModel/selectionModelesLassoRank.m new file mode 100644 index 0000000..aacc460 --- /dev/null +++ b/SelectModel/selectionModelesLassoRank.m @@ -0,0 +1,11 @@ +%Selection de modèle dans la procedure Lasso Rank +vraisLassoRank=vraisLassoRank'; +% Selection de modele : voici les parametres selectionnes pour chaque +% dimension, et l'indice associe +[indiceLassoRank,D1LassoRank]=selectionmodele(vraisLassoRank); +tableauLassoRank=enregistrerdonnees(D1LassoRank,vraisLassoRank(indiceLassoRank,:),n); +%On veut tracer la courbe des pentes +%figure(2) +%plot(D1LassoRank/n,vraisLassoRank(indiceLassoRank),'.') +save tableauLassoRank.mat tableauLassoRank +%On conclut avec Capushe ! diff --git a/SelectModel/selectiondindice.m b/SelectModel/selectiondindice.m new file mode 100644 index 0000000..4ef8b96 --- /dev/null +++ b/SelectModel/selectiondindice.m @@ -0,0 +1,19 @@ +%utile dans selectiontotale.m, remarque quels coefficients sont nuls et lesquels ne le sont pas +function[A,B]=selectiondindice(phi,seuil) + + [pp,m,~]=size(phi); + A=zeros(pp,m); + B=zeros(pp,m); + for j=1:pp + cpt=0;cpt2=0; + for mm=1:m + if (max(phi(j,mm,:)) > seuil) + cpt=cpt+1; + A(j,cpt)=mm; + else cpt2=cpt2+1; + B(j,cpt2)=mm; + end + end + end + +end diff --git a/SelectModel/selectionmodele.m b/SelectModel/selectionmodele.m new file mode 100644 index 0000000..33828ab --- /dev/null +++ b/SelectModel/selectionmodele.m @@ -0,0 +1,20 @@ +function [indice,D1]=selectionmodele(vraisemblance) + + D=vraisemblance(:,2); + [D1]=unique(D); + indice=ones(1,length(D1)); + %On ne sélectionne que celui qui maximise : l'EMV + if length(D1)>2 + for i=1:length(D1) + a=[]; + for j=1:length(D) + if D(j)==D1(i) + a=[a,vraisemblance(j,1)]; + end + end + b=max(a); + indice(i)=find(vraisemblance(:,1)==b,1); + end + end + +end diff --git a/SelectModel/suppressionmodelesegaux.m b/SelectModel/suppressionmodelesegaux.m new file mode 100644 index 0000000..3d2a3a9 --- /dev/null +++ b/SelectModel/suppressionmodelesegaux.m @@ -0,0 +1,18 @@ +function[B1,B2,glambda,ind,rho,pi]=suppressionmodelesegaux(B1,B2,glambda,rho,pi) + + ind=[]; + for l=1:length(glambda) + for ll=1:l-1 + if B1(:,:,l)==B1(:,:,ll) + ind=[ind l]; + end + end + end + ind=unique(ind); + B1(:,:,ind)=[]; + glambda(ind)=[]; + B2(:,:,ind)=[]; + rho(:,:,:,ind)=[]; + pi(:,ind)=[]; + +end diff --git a/SelectModel/suppressionmodelesegaux2.m b/SelectModel/suppressionmodelesegaux2.m new file mode 100644 index 0000000..3158d36 --- /dev/null +++ b/SelectModel/suppressionmodelesegaux2.m @@ -0,0 +1,17 @@ +function[B1,ind,rho,pi]=suppressionmodelesegaux2(B1,rho,pi) + + ind=[]; + nombreLambda=size(B1,2); + for l=1:nombreLambda + for ll=1:l-1 + if B1(:,l)==B1(:,ll) + ind=[ind l]; + end + end + end + ind=unique(ind); + B1(:,ind)=[]; + rho(:,:,:,ind)=[]; + pi(:,ind)=[]; + +end diff --git a/Util/ioutils.c b/Util/ioutils.c new file mode 100644 index 0000000..36a1d8e --- /dev/null +++ b/Util/ioutils.c @@ -0,0 +1,168 @@ +#include "ioutils.h" +#include +#include + +// Check if array == refArray +void compareArray(const char* ID, const void* array, const void* refArray, mwSize size, int isInteger) +{ + Real EPS = 1e-5; //precision + printf("Checking %s\n",ID); + Real maxError = 0.0; + for (mwSize i=0; i= maxError) + maxError = error; + } + if (maxError >= EPS) + printf(" Inaccuracy: max(abs(error)) = %g >= %g\n",maxError,EPS); + else + printf(" OK\n"); +} + +// Next function is a generalization of : +//~ Real* brToMatlabArray(Real* brArray, int dimX, int dimY, int dimZ, int dimW) +//~ { + //~ Real* matlabArray = (Real*)malloc(dimX*dimY*dimZ*dimW*sizeof(Real)); + //~ for (int u=0; u +#include +#include +#include //for type wchar16_t + +// Include header for mwSize type +#ifdef Octave +#include +#else +#include +#endif + +// CHUNK_SIZE = number of lambda values to be treated sequentially by a single core +#define CHUNK_SIZE 1 + +// integer type chosen in MATLAB (to be tuned: 32 bits should be enough) +typedef int64_t Int; + +// real number type chosen in MATLAB (default: double) +typedef double Real; + +#ifndef M_PI +#define M_PI 3.141592653589793 +#endif + +// Fill an array with zeros +#define zeroArray(array, size)\ +{\ + for (Int u=0; u