From 1d3c1faaef57d906a7f12490040398b252ff049d Mon Sep 17 00:00:00 2001 From: Benjamin Auder <benjamin.auder@somewhere> Date: Wed, 16 Nov 2016 14:31:39 +0100 Subject: [PATCH] initial commit --- InputParameters/basicInitParameters.m | 19 + InputParameters/compileMex.m | 7 + InputParameters/generateIO.m | 37 ++ InputParameters/generateIOdefault.m | 23 + InputParameters/grillelambda.m | 17 + InputParameters/initSmallEM.m | 33 ++ InputParameters/selectiontotale.c | 142 ++++++ InputParameters/selectiontotale.h | 33 ++ InputParameters/selectiontotale.m | 54 +++ InputParameters/selectiontotale_interface.c | 127 ++++++ ProcLassoMLE/EMGLLF.c | 409 ++++++++++++++++++ ProcLassoMLE/EMGLLF.h | 31 ++ ProcLassoMLE/EMGLLF.m | 174 ++++++++ ProcLassoMLE/EMGLLF_interface.c | 125 ++++++ ProcLassoMLE/compileMex.m | 9 + ProcLassoMLE/constructionModelesLassoMLE.c | 218 ++++++++++ ProcLassoMLE/constructionModelesLassoMLE.h | 34 ++ ProcLassoMLE/constructionModelesLassoMLE.m | 58 +++ .../constructionModelesLassoMLE_interface.c | 142 ++++++ ProcLassoRank/EMGrank.c | 308 +++++++++++++ ProcLassoRank/EMGrank.h | 25 ++ ProcLassoRank/EMGrank.m | 69 +++ ProcLassoRank/EMGrank_interface.c | 90 ++++ ProcLassoRank/compileMex.m | 9 + ProcLassoRank/constructionModelesLassoRank.c | 131 ++++++ ProcLassoRank/constructionModelesLassoRank.h | 29 ++ ProcLassoRank/constructionModelesLassoRank.m | 40 ++ .../constructionModelesLassoRank_interface.c | 109 +++++ README.md | 53 +++ SelectModel/selectionModelesLassoMLE.m | 12 + SelectModel/selectionModelesLassoRank.m | 11 + SelectModel/selectiondindice.m | 19 + SelectModel/selectionmodele.m | 20 + SelectModel/suppressionmodelesegaux.m | 18 + SelectModel/suppressionmodelesegaux2.m | 17 + Util/ioutils.c | 168 +++++++ Util/ioutils.h | 75 ++++ Util/omp_num_threads.h | 7 + selmix.m | 194 +++++++++ 39 files changed, 3096 insertions(+) create mode 100644 InputParameters/basicInitParameters.m create mode 100644 InputParameters/compileMex.m create mode 100644 InputParameters/generateIO.m create mode 100644 InputParameters/generateIOdefault.m create mode 100644 InputParameters/grillelambda.m create mode 100644 InputParameters/initSmallEM.m create mode 100644 InputParameters/selectiontotale.c create mode 100644 InputParameters/selectiontotale.h create mode 100644 InputParameters/selectiontotale.m create mode 100644 InputParameters/selectiontotale_interface.c create mode 100644 ProcLassoMLE/EMGLLF.c create mode 100644 ProcLassoMLE/EMGLLF.h create mode 100644 ProcLassoMLE/EMGLLF.m create mode 100644 ProcLassoMLE/EMGLLF_interface.c create mode 100644 ProcLassoMLE/compileMex.m create mode 100644 ProcLassoMLE/constructionModelesLassoMLE.c create mode 100644 ProcLassoMLE/constructionModelesLassoMLE.h create mode 100644 ProcLassoMLE/constructionModelesLassoMLE.m create mode 100644 ProcLassoMLE/constructionModelesLassoMLE_interface.c create mode 100644 ProcLassoRank/EMGrank.c create mode 100644 ProcLassoRank/EMGrank.h create mode 100644 ProcLassoRank/EMGrank.m create mode 100644 ProcLassoRank/EMGrank_interface.c create mode 100644 ProcLassoRank/compileMex.m create mode 100644 ProcLassoRank/constructionModelesLassoRank.c create mode 100644 ProcLassoRank/constructionModelesLassoRank.h create mode 100644 ProcLassoRank/constructionModelesLassoRank.m create mode 100644 ProcLassoRank/constructionModelesLassoRank_interface.c create mode 100644 README.md create mode 100644 SelectModel/selectionModelesLassoMLE.m create mode 100644 SelectModel/selectionModelesLassoRank.m create mode 100644 SelectModel/selectiondindice.m create mode 100644 SelectModel/selectionmodele.m create mode 100644 SelectModel/suppressionmodelesegaux.m create mode 100644 SelectModel/suppressionmodelesegaux2.m create mode 100644 Util/ioutils.c create mode 100644 Util/ioutils.h create mode 100644 Util/omp_num_threads.h create mode 100644 selmix.m diff --git a/InputParameters/basicInitParameters.m b/InputParameters/basicInitParameters.m new file mode 100644 index 0000000..50410f5 --- /dev/null +++ b/InputParameters/basicInitParameters.m @@ -0,0 +1,19 @@ +function[phiInit,rhoInit,piInit,gamInit] = basicInitParameters(n,p,m,k) + + phiInit = zeros(p,m,k); + + piInit = (1.0/k) * ones(1,k); + + rhoInit = zeros(m,m,k); + for r=1:k + rhoInit(:,:,r) = eye(m,m); + end + + gamInit = 0.1 * ones(n,k); + R = random('unid',k,n,1); + for i=1:n + gamInit(i,R(i)) = 0.9; + end + gamInit = gamInit / (sum(gamInit(1,:))); + +end diff --git a/InputParameters/compileMex.m b/InputParameters/compileMex.m new file mode 100644 index 0000000..9e5f64b --- /dev/null +++ b/InputParameters/compileMex.m @@ -0,0 +1,7 @@ +%compile C code (for MATLAB or Octave) +if exist('octave_config_info') + setenv('CFLAGS','-O2 -std=gnu99 -fopenmp') + mkoctfile --mex -DOctave -I../Util -I../ProcLassoMLE selectiontotale.c selectiontotale_interface.c ../Util/ioutils.c ../ProcLassoMLE/EMGLLF.c -o selectiontotale -lm -lgsl -lgslcblas -lgomp +else + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util -I../ProcLassoMLE selectiontotale.c selectiontotale_interface.c ../Util/ioutils.c ../ProcLassoMLE/EMGLLF.c -output selectiontotale -lm -lgsl -lgslcblas -lgomp +end diff --git a/InputParameters/generateIO.m b/InputParameters/generateIO.m new file mode 100644 index 0000000..c677fd2 --- /dev/null +++ b/InputParameters/generateIO.m @@ -0,0 +1,37 @@ +%X is generated following a gaussian mixture \sum pi_r N(meanX_k, covX_r) +%Y is generated then, with Y_i ~ \sum pi_r N(Beta_r.X_i, covY_r) +function[X,Y,Z] = generateIO(meanX, covX, covY, pi, beta, n) + + [p, ~, k] = size(covX); + [m, ~, ~] = size(covY); + if exist('octave_config_info') + %Octave statistics package doesn't have gmdistribution() + X = zeros(n, p); + Z = zeros(n); + cs = cumsum(pi); + for i=1:n + %TODO: vectorize ? http://stackoverflow.com/questions/2977497/weighted-random-numbers-in-matlab + tmpRand01 = rand(); + [~,Z(i)] = min(cs - tmpRand01 >= 0); + X(i,:) = mvnrnd(meanX(Z(i),:), covX(:,:,Z(i)), 1); + end + else + gmDistX = gmdistribution(meanX, covX, pi); + [X, Z] = random(gmDistX, n); + end + + Y = zeros(n, m); + BX = zeros(n,m,k); + for i=1:n + for r=1:k + %compute beta_r . X_i + BXir = zeros(1, m); + for mm=1:m + BXir(mm) = dot(X(i,:), beta(:,mm,r)); + end + %add pi(r) * N(beta_r . X_i, covY) to Y_i + Y(i,:) = Y(i,:) + pi(r) * mvnrnd(BXir, covY(:,:,r), 1); + end + end + +end diff --git a/InputParameters/generateIOdefault.m b/InputParameters/generateIOdefault.m new file mode 100644 index 0000000..f4c3c1f --- /dev/null +++ b/InputParameters/generateIOdefault.m @@ -0,0 +1,23 @@ +%call generateIO with default parameters (random means, covariances = identity, equirepartition) +function[X,Y,Z] = generateIOdefault(n, p, m, k) + + rangeX = 100; + meanX = rangeX * (1 - 2*rand(k, p)); + covX = zeros(p,p,k); + covY = zeros(m,m,k); + for r=1:k + covX(:,:,r) = eye(p); + covY(:,:,r) = eye(m); + end + pi = (1/k) * ones(1,k); + + %initialize beta to a random number of non-zero random value + beta = zeros(p,m,k); + for j=1:p + nonZeroCount = ceil(m*rand(1)); + beta(j,1:nonZeroCount,:) = rand(nonZeroCount, k); + end + + [X,Y,Z] = generateIO(meanX, covX, covY, pi, beta, n); + +end diff --git a/InputParameters/grillelambda.m b/InputParameters/grillelambda.m new file mode 100644 index 0000000..8d90665 --- /dev/null +++ b/InputParameters/grillelambda.m @@ -0,0 +1,17 @@ +function[gridLambda] = grillelambda(phiInit,rhoInit,piInit,gamInit,X,Y,gamma,mini,maxi,tau) + + n = size(X, 1); + [p,m,k] = size(phiInit); + [phi,rho,pi,~,S] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,1,0,X,Y,tau); + + gridLambda = zeros(p,m,k); + for j=1:p + for mm=1:m + gridLambda(j,mm,:) = abs(reshape(S(j,mm,:),[k,1])) ./ (n*pi.^gamma); + end + end + + gridLambda = unique(gridLambda); + gridLambda(gridLambda()>1) = []; + +end diff --git a/InputParameters/initSmallEM.m b/InputParameters/initSmallEM.m new file mode 100644 index 0000000..6a2ccc9 --- /dev/null +++ b/InputParameters/initSmallEM.m @@ -0,0 +1,33 @@ +function[phiInit,rhoInit,piInit,gamInit] = initSmallEM(k,x,y,tau) +[n,m]=size(y); +gamInit1=zeros(n,k,20); +for repet=1:20 + Zinit1(:,repet)=clusterdata(y,k); + for r=1:k + betaInit1(:,:,r,repet)=pinv(transpose(x(Zinit1(:,repet)==r,:))*x(Zinit1(:,repet)==r,:))*transpose(x(Zinit1(:,repet)==r,:))*y(Zinit1(:,repet)==r,:); + sigmaInit1(:,:,r,repet)=eye(m); + phiInit1(:,:,r,repet)=betaInit1(:,:,r,repet)/sigmaInit1(:,:,r,repet); + rhoInit1(:,:,r,repet)=inv(sigmaInit1(:,:,r,repet)); + piInit1(repet,r)=sum(Zinit1(:,repet)==r)/n; + end + for i=1:n + for r=1:k + dotProduct = (y(i,:)*rhoInit1(:,:,r,repet)-x(i,:)*phiInit1(:,:,r,repet)) * transpose(y(i,:)*rhoInit1(:,:,r,repet)-x(i,:)*phiInit1(:,:,r,repet)); + Gam(i,r) = piInit1(repet,r)*det(rhoInit1(:,:,r,repet))*exp(-0.5*dotProduct); + end + sumGamI = sum(Gam(i,:)); + gamInit1(i,:,repet) = Gam(i,:) / sumGamI; + end + miniInit=int64(10); + maxiInit=int64(11); + + [~,~,~,LLFEssai,~] = EMGLLF(phiInit1(:,:,:,repet),rhoInit1(:,:,:,repet),piInit1(repet,:),gamInit1(:,:,repet),miniInit,maxiInit,1,0,x,y,tau); + LLFinit1(repet)=LLFEssai(end); +end +[~,b]=max(LLFinit1); + +phiInit=phiInit1(:,:,:,b); +rhoInit=rhoInit1(:,:,:,b); +piInit=piInit1(b,:); +gamInit=gamInit1(:,:,b); +end \ No newline at end of file diff --git a/InputParameters/selectiontotale.c b/InputParameters/selectiontotale.c new file mode 100644 index 0000000..f3ed95b --- /dev/null +++ b/InputParameters/selectiontotale.c @@ -0,0 +1,142 @@ +#include "selectiontotale.h" +#include "EMGLLF.h" +#include <omp.h> +#include "omp_num_threads.h" + +// Main job on raw inputs (after transformation from mxArray) +void selectiontotale( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + Int mini, // nombre minimal d'itérations dans lambdaIndex'algorithme EM + Int maxi, // nombre maximal d'itérations dans lambdaIndex'algorithme EM + Real gamma, // valeur de gamma : puissance des proportions dans la pénalisation pour un Lasso adaptatif + const Real* glambda, // valeur des paramètres de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real seuil, // seuil pour prendre en compte une variable + Real tau, // seuil pour accepter la convergence + // OUT parameters (all pointers, to be modified) + Int* A1, // matrice des coefficients des parametres selectionnes + Int* A2, // matrice des coefficients des parametres non selectionnes + Real* Rho, // estimateur ainsi calculé par le Lasso + Real* Pi, // estimateur ainsi calculé par le Lasso + // additional size parameters + mwSize n, // taille de lambdaIndex'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k, // nombre de composantes + mwSize L) // taille de glambda +{ + // Fill outputs with zeros: they might not be assigned + for (int u=0; u<p*(m+1)*L; u++) + { + A1[u] = 0; + A2[u] = 0; + } + for (int u=0; u<m*m*k*L; u++) + Rho[u] = 0.0; + for (int u=0; u<k*L; u++) + Pi[u] = 0.0; + + //initiate parallel section + mwSize lambdaIndex; + omp_set_num_threads(OMP_NUM_THREADS); + #pragma omp parallel default(shared) private(lambdaIndex) + { + #pragma omp for schedule(dynamic,CHUNK_SIZE) nowait + for (lambdaIndex=0; lambdaIndex<L; lambdaIndex++) + { + //allocate output variables + Real* phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* pi = (Real*)malloc(k*sizeof(Real)); + Real* LLF = (Real*)malloc(maxi*sizeof(Real)); + Real* S = (Real*)malloc(p*m*k*sizeof(Real)); + EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda[lambdaIndex],X,Y,tau, + phi,rho,pi,LLF,S, + n,p,m,k); + free(LLF); + free(S); + + //Si un des coefficients est supérieur au seuil, on garde cette variable + mwSize* selectedVariables = (mwSize*)calloc(p*m,sizeof(mwSize)); + mwSize* discardedVariables = (mwSize*)calloc(p*m,sizeof(mwSize)); + int atLeastOneSelectedVariable = 0; + for (mwSize j=0; j<p; j++) + { + mwSize cpt = 0; + mwSize cpt2 = 0; + for (mwSize jj=0; jj<m; jj++) + { + Real maxPhi = 0.0; + for (mwSize r=0; r<k; r++) + { + if (fabs(phi[j*m*k+jj*k+r]) > maxPhi) + maxPhi = fabs(phi[j*m*k+jj*k+r]); + } + if (maxPhi > seuil) + { + selectedVariables[j*m+cpt] = jj+1; + atLeastOneSelectedVariable = 1; + cpt++; + } + else + { + discardedVariables[j*m+cpt2] = jj+1; + cpt2++; + } + } + } + free(phi); + + if (atLeastOneSelectedVariable) + { + Int* vec = (Int*)malloc(p*sizeof(Int)); + mwSize vecSize = 0; + for (mwSize j=0; j<p; j++) + { + if (selectedVariables[j*m+0] != 0) + vec[vecSize++] = j; + } + + //A1 + for (mwSize j=0; j<p; j++) + A1[j*(m+1)*L+0*L+lambdaIndex] = (j < vecSize ? vec[j]+1 : 0); + for (mwSize j=0; j<vecSize; j++) + { + for (mwSize jj=1; jj<=m; jj++) + A1[j*(m+1)*L+jj*L+lambdaIndex] = selectedVariables[vec[j]*m+jj-1]; + } + //A2 + for (mwSize j=0; j<p; j++) + A2[j*(m+1)*L+0*L+lambdaIndex] = j+1; + for (mwSize j=0; j<p; j++) + { + for (mwSize jj=1; jj<=m; jj++) + A2[j*(m+1)*L+jj*L+lambdaIndex] = discardedVariables[j*m+jj-1]; + } + //Rho + for (mwSize j=0; j<m; j++) + { + for (mwSize jj=0; jj<m; jj++) + { + for (mwSize r=0; r<k; r++) + Rho[j*m*k*L+jj*k*L+r*L+lambdaIndex] = rho[j*m*k+jj*k+r]; + } + } + //Pi + for (mwSize r=0; r<k; r++) + Pi[r*L+lambdaIndex] = pi[r]; + free(vec); + } + + free(selectedVariables); + free(discardedVariables); + free(rho); + free(pi); + } + } +} diff --git a/InputParameters/selectiontotale.h b/InputParameters/selectiontotale.h new file mode 100644 index 0000000..2d02da8 --- /dev/null +++ b/InputParameters/selectiontotale.h @@ -0,0 +1,33 @@ +#ifndef select_selectiontotale_H +#define select_selectiontotale_H + +#include "ioutils.h" + +// Main job on raw inputs (after transformation from mxArray) +void selectiontotale( + // IN parameters + const Real* phiInit, + const Real* rhoInit, + const Real* piInit, + const Real* gamInit, + Int mini, + Int maxi, + Real gamma, + const Real* glambda, + const Real* X, + const Real* Y, + Real seuil, + Real tau, + // OUT parameters + Int* A1, + Int* A2, + Real* Rho, + Real* Pi, + // additional size parameters + mwSize n, + mwSize p, + mwSize m, + mwSize k, + mwSize L); + +#endif diff --git a/InputParameters/selectiontotale.m b/InputParameters/selectiontotale.m new file mode 100644 index 0000000..6c24d05 --- /dev/null +++ b/InputParameters/selectiontotale.m @@ -0,0 +1,54 @@ +function[A1,A2,Rho,Pi] = selectiontotale(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau) + + [p,m,k] = size(phiInit); + L = length(glambda); + A1 = zeros(p,m+1,L,'int64'); + A2 = zeros(p,m+1,L,'int64'); + Rho = zeros(m,m,k,L); + Pi = zeros(k,L); + + %Pour chaque lambda de la grille, on calcule les coefficients + for lambdaIndex=1:L + [phi,rho,pi,~,~] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda(lambdaIndex),X,Y,tau); + + %Si un des coefficients est supérieur au seuil, on garde cette variable + selectedVariables = zeros(p,m); + discardedVariables = zeros(p,m); + atLeastOneSelectedVariable = false; + for j=1:p + cpt=1; + cpt2=1; + for mm=1:m + if max(abs(phi(j,mm,:))) > seuil + selectedVariables(j,cpt) = mm; + cpt = cpt+1; + atLeastOneSelectedVariable = true; + else + discardedVariables(j,cpt2) = mm; + cpt2 = cpt2+1; + end + end + end + + %Si aucun des coefficients n'a été gardé on renvoit la matrice nulle + %Et si on enlevait ces colonnes de zéro ??? Indices des colonnes vides + if atLeastOneSelectedVariable + vec = []; + for j=1:p + if selectedVariables(j,1) ~= 0 + vec = [vec;j]; + end + end + + %Sinon on renvoit les numéros des coefficients utiles + A1(:,1,lambdaIndex) = [vec;zeros(p-length(vec),1)]; + A1(1:length(vec),2:m+1,lambdaIndex) = selectedVariables(vec,:); + A2(:,1,lambdaIndex) = 1:p; + A2(:,2:m+1,lambdaIndex) = discardedVariables; + Rho(:,:,:,lambdaIndex) = rho; + Pi(:,lambdaIndex) = pi; + end + + end + +end diff --git a/InputParameters/selectiontotale_interface.c b/InputParameters/selectiontotale_interface.c new file mode 100644 index 0000000..b4b8f4c --- /dev/null +++ b/InputParameters/selectiontotale_interface.c @@ -0,0 +1,127 @@ +#include "ioutils.h" +#include "selectiontotale.h" +#include "EMGLLF.h" +#include <mex.h> + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction(int nlhs, mxArray* plhs[], + int nrhs, const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=12) + mexErrMsgIdAndTxt("select:selectiontotale:nrhs","12 inputs required."); + if (nlhs!=4) + mexErrMsgIdAndTxt("select:selectiontotale:nlhs","4 outputs required."); + + // Get matrices dimensions, to be given to main routine above + const mwSize n = mxGetDimensions(prhs[8])[0]; + const mwSize p = mxGetDimensions(prhs[8])[1]; + const mwSize m = mxGetDimensions(prhs[1])[0]; + const mwSize k = mxGetDimensions(prhs[1])[2]; + const mwSize L = mxGetNumberOfElements(prhs[7]); + + //////////// + // INPUTS // + //////////// + + // phiInit + const mwSize* dimPhiInit = mxGetDimensions(prhs[0]); + Real* brPhiInit = matlabToBrArray_real(mxGetPr(prhs[0]), dimPhiInit, 3); + + // rhoInit + const mwSize* dimRhoInit = mxGetDimensions(prhs[1]); + Real* brRhoInit = matlabToBrArray_real(mxGetPr(prhs[1]), dimRhoInit, 3); + + // piInit + Real* piInit = mxGetPr(prhs[2]); + + // gamInit + const mwSize* dimGamInit = mxGetDimensions(prhs[3]); + Real* brGamInit = matlabToBrArray_real(mxGetPr(prhs[3]), dimGamInit, 2); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[4]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[5]))[0]; + + // gamma + Real gamma = mxGetScalar(prhs[6]); + + // glambda + Real* glambda = mxGetPr(prhs[7]); + + // X + const mwSize* dimX = mxGetDimensions(prhs[8]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[8]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[9]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[9]), dimY, 2); + + //seuil + Real seuil = mxGetScalar(prhs[10]); + + // tau + Real tau = mxGetScalar(prhs[11]); + + ///////////// + // OUTPUTS // + ///////////// + + // A1 + mwSize dimA[] = {p,m+1,L}; + plhs[0] = mxCreateNumericArray(3,dimA,mxGetClassID(prhs[4]),mxREAL); + Int* A1 = (Int*)mxGetData(plhs[0]); + + // A2 + plhs[1] = mxCreateNumericArray(3,dimA,mxGetClassID(prhs[4]),mxREAL); + Int* A2 = (Int*)mxGetData(plhs[1]); + + // rho + const mwSize dimRho[] = {dimRhoInit[0], dimRhoInit[1], dimRhoInit[2], L}; + plhs[2] = mxCreateNumericArray(4,dimRho,mxDOUBLE_CLASS,mxREAL); + Real* Rho = mxGetPr(plhs[2]); + + // pi + const mwSize dimPi[] = {k, L}; + plhs[3] = mxCreateNumericMatrix(dimPi[0],dimPi[1],mxDOUBLE_CLASS,mxREAL); + double* Pi = mxGetPr(plhs[3]); + + ///////////////////////////// + // Call to selectiontotale // + ///////////////////////////// + + selectiontotale(brPhiInit,brRhoInit,piInit,brGamInit,mini,maxi,gamma,glambda,brX,brY,seuil,tau, + A1,A2,Rho,Pi, + n,p,m,k,L); + + free(brPhiInit); + free(brRhoInit); + free(brGamInit); + free(brX); + free(brY); + + //post-processing: convert by-rows outputs to MATLAB matrices + Int* mlA1 = brToMatlabArray_int(A1,dimA,3); + copyArray(mlA1,A1,dimA[0]*dimA[1]*dimA[2]); + free(mlA1); + + Int* mlA2 = brToMatlabArray_int(A2,dimA,3); + copyArray(mlA2,A2,dimA[0]*dimA[1]*dimA[2]); + free(mlA2); + + Real* mlRho = brToMatlabArray_real(Rho, dimRho, 4); + copyArray(mlRho, Rho, dimRho[0]*dimRho[1]*dimRho[2]*dimRho[3]); + free(mlRho); + + Real* mlPi = brToMatlabArray_real(Pi, dimPi, 2); + copyArray(mlPi, Pi, dimPi[0]*dimPi[1]); + free(mlPi); +} diff --git a/ProcLassoMLE/EMGLLF.c b/ProcLassoMLE/EMGLLF.c new file mode 100644 index 0000000..96b81b3 --- /dev/null +++ b/ProcLassoMLE/EMGLLF.c @@ -0,0 +1,409 @@ +#include "EMGLLF.h" +#include <gsl/gsl_linalg.h> + +// TODO: comment on EMGLLF purpose +void EMGLLF( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + Real gamma, // valeur de gamma : puissance des proportions dans la pénalisation pour un Lasso adaptatif + Real lambda, // valeur du paramètre de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + // OUT parameters (all pointers, to be modified) + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* rho, // parametre de variance renormalisé, calculé par l'EM + Real* pi, // parametre des proportions renormalisé, calculé par l'EM + Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres + Real* S, + // additional size parameters + mwSize n, // nombre d'echantillons + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k) // nombre de composantes dans le mélange +{ + //Initialize outputs + copyArray(phiInit, phi, p*m*k); + copyArray(rhoInit, rho, m*m*k); + copyArray(piInit, pi, k); + zeroArray(LLF, maxi); + //S is already allocated, and doesn't need to be 'zeroed' + + //Other local variables + //NOTE: variables order is always [maxi],n,p,m,k + Real* gam = (Real*)malloc(n*k*sizeof(Real)); + copyArray(gamInit, gam, n*k); + Real* b = (Real*)malloc(k*sizeof(Real)); + Real* Phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* Rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* Pi = (Real*)malloc(k*sizeof(Real)); + Real* gam2 = (Real*)malloc(k*sizeof(Real)); + Real* pi2 = (Real*)malloc(k*sizeof(Real)); + Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real)); + Real* ps = (Real*)malloc(m*k*sizeof(Real)); + Real* nY2 = (Real*)malloc(m*k*sizeof(Real)); + Real* ps1 = (Real*)malloc(n*m*k*sizeof(Real)); + Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real)); + Real* nY21 = (Real*)malloc(n*m*k*sizeof(Real)); + Real* Gam = (Real*)malloc(n*k*sizeof(Real)); + Real* X2 = (Real*)malloc(n*p*k*sizeof(Real)); + Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real)); + gsl_matrix* matrix = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real dist = 0.0; + Real dist2 = 0.0; + Int ite = 0; + Real EPS = 1e-15; + Real* dotProducts = (Real*)malloc(k*sizeof(Real)); + + while (ite < mini || (ite < maxi && (dist >= tau || dist2 >= sqrt(tau)))) + { + copyArray(phi, Phi, p*m*k); + copyArray(rho, Rho, m*m*k); + copyArray(pi, Pi, k); + + // Calculs associes a Y et X + for (mwSize r=0; r<k; r++) + { + for (mwSize mm=0; mm<m; mm++) + { + //Y2(:,mm,r)=sqrt(gam(:,r)).*transpose(Y(mm,:)); + for (mwSize u=0; u<n; u++) + Y2[u*m*k+mm*k+r] = sqrt(gam[u*k+r]) * Y[u*m+mm]; + } + for (mwSize i=0; i<n; i++) + { + //X2(i,:,r)=X(i,:).*sqrt(gam(i,r)); + for (mwSize u=0; u<p; u++) + X2[i*p*k+u*k+r] = sqrt(gam[i*k+r]) * X[i*p+u]; + } + for (mwSize mm=0; mm<m; mm++) + { + //ps2(:,mm,r)=transpose(X2(:,:,r))*Y2(:,mm,r); + for (mwSize u=0; u<p; u++) + { + Real dotProduct = 0.0; + for (mwSize v=0; v<n; v++) + dotProduct += X2[v*p*k+u*k+r] * Y2[v*m*k+mm*k+r]; + ps2[u*m*k+mm*k+r] = dotProduct; + } + } + for (mwSize j=0; j<p; j++) + { + for (mwSize s=0; s<p; s++) + { + //Gram2(j,s,r)=transpose(X2(:,j,r))*(X2(:,s,r)); + Real dotProduct = 0.0; + for (mwSize u=0; u<n; u++) + dotProduct += X2[u*p*k+j*k+r] * X2[u*p*k+s*k+r]; + Gram2[j*p*k+s*k+r] = dotProduct; + } + } + } + + ///////////// + // Etape M // + ///////////// + + // Pour pi + for (mwSize r=0; r<k; r++) + { + //b(r) = sum(sum(abs(phi(:,:,r)))); + Real sumAbsPhi = 0.0; + for (mwSize u=0; u<p; u++) + for (mwSize v=0; v<m; v++) + sumAbsPhi += fabs(phi[u*m*k+v*k+r]); + b[r] = sumAbsPhi; + } + //gam2 = sum(gam,1); + for (mwSize u=0; u<k; u++) + { + Real sumOnColumn = 0.0; + for (mwSize v=0; v<n; v++) + sumOnColumn += gam[v*k+u]; + gam2[u] = sumOnColumn; + } + //a=sum(gam*transpose(log(pi))); + Real a = 0.0; + for (mwSize u=0; u<n; u++) + { + Real dotProduct = 0.0; + for (mwSize v=0; v<k; v++) + dotProduct += gam[u*k+v] * log(pi[v]); + a += dotProduct; + } + + //tant que les proportions sont negatives + mwSize kk = 0; + int pi2AllPositive = 0; + Real invN = 1.0/n; + while (!pi2AllPositive) + { + //pi2(:)=pi(:)+0.1^kk*(1/n*gam2(:)-pi(:)); + for (mwSize r=0; r<k; r++) + pi2[r] = pi[r] + pow(0.1,kk) * (invN*gam2[r] - pi[r]); + pi2AllPositive = 1; + for (mwSize r=0; r<k; r++) + { + if (pi2[r] < 0) + { + pi2AllPositive = 0; + break; + } + } + kk++; + } + + //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante + //(pi.^gamma)*b + Real piPowGammaDotB = 0.0; + for (mwSize v=0; v<k; v++) + piPowGammaDotB += pow(pi[v],gamma) * b[v]; + //(pi2.^gamma)*b + Real pi2PowGammaDotB = 0.0; + for (mwSize v=0; v<k; v++) + pi2PowGammaDotB += pow(pi2[v],gamma) * b[v]; + //transpose(gam2)*log(pi2) + Real prodGam2logPi2 = 0.0; + for (mwSize v=0; v<k; v++) + prodGam2logPi2 += gam2[v] * log(pi2[v]); + while (-invN*a + lambda*piPowGammaDotB < -invN*prodGam2logPi2 + lambda*pi2PowGammaDotB && kk<1000) + { + //pi2=pi+0.1^kk*(1/n*gam2-pi); + for (mwSize v=0; v<k; v++) + pi2[v] = pi[v] + pow(0.1,kk) * (invN*gam2[v] - pi[v]); + //pi2 was updated, so we recompute pi2PowGammaDotB and prodGam2logPi2 + pi2PowGammaDotB = 0.0; + for (mwSize v=0; v<k; v++) + pi2PowGammaDotB += pow(pi2[v],gamma) * b[v]; + prodGam2logPi2 = 0.0; + for (mwSize v=0; v<k; v++) + prodGam2logPi2 += gam2[v] * log(pi2[v]); + kk++; + } + Real t = pow(0.1,kk); + //sum(pi+t*(pi2-pi)) + Real sumPiPlusTbyDiff = 0.0; + for (mwSize v=0; v<k; v++) + sumPiPlusTbyDiff += (pi[v] + t*(pi2[v] - pi[v])); + //pi=(pi+t*(pi2-pi))/sum(pi+t*(pi2-pi)); + for (mwSize v=0; v<k; v++) + pi[v] = (pi[v] + t*(pi2[v] - pi[v])) / sumPiPlusTbyDiff; + + //Pour phi et rho + for (mwSize r=0; r<k; r++) + { + for (mwSize mm=0; mm<m; mm++) + { + for (mwSize i=0; i<n; i++) + { + //< X2(i,:,r) , phi(:,mm,r) > + Real dotProduct = 0.0; + for (mwSize u=0; u<p; u++) + dotProduct += X2[i*p*k+u*k+r] * phi[u*m*k+mm*k+r]; + //ps1(i,mm,r)=Y2(i,mm,r)*dot(X2(i,:,r),phi(:,mm,r)); + ps1[i*m*k+mm*k+r] = Y2[i*m*k+mm*k+r] * dotProduct; + nY21[i*m*k+mm*k+r] = Y2[i*m*k+mm*k+r] * Y2[i*m*k+mm*k+r]; + } + //ps(mm,r)=sum(ps1(:,mm,r)); + Real sumPs1 = 0.0; + for (mwSize u=0; u<n; u++) + sumPs1 += ps1[u*m*k+mm*k+r]; + ps[mm*k+r] = sumPs1; + //nY2(mm,r)=sum(nY21(:,mm,r)); + Real sumNy21 = 0.0; + for (mwSize u=0; u<n; u++) + sumNy21 += nY21[u*m*k+mm*k+r]; + nY2[mm*k+r] = sumNy21; + //rho(mm,mm,r)=((ps(mm,r)+sqrt(ps(mm,r)^2+4*nY2(mm,r)*(gam2(r))))/(2*nY2(mm,r))); + rho[mm*m*k+mm*k+r] = ( ps[mm*k+r] + sqrt( ps[mm*k+r]*ps[mm*k+r] + + 4*nY2[mm*k+r] * (gam2[r]) ) ) / (2*nY2[mm*k+r]); + } + } + for (mwSize r=0; r<k; r++) + { + for (mwSize j=0; j<p; j++) + { + for (mwSize mm=0; mm<m; mm++) + { + //sum(phi(1:j-1,mm,r).*transpose(Gram2(j,1:j-1,r)))+sum(phi(j+1:p,mm,r).*transpose(Gram2(j,j+1:p,r))) + Real dotPhiGram2 = 0.0; + for (mwSize u=0; u<j; u++) + dotPhiGram2 += phi[u*m*k+mm*k+r] * Gram2[j*p*k+u*k+r]; + for (mwSize u=j+1; u<p; u++) + dotPhiGram2 += phi[u*m*k+mm*k+r] * Gram2[j*p*k+u*k+r]; + //S(j,r,mm)=-rho(mm,mm,r)*ps2(j,mm,r)+sum(phi(1:j-1,mm,r).*transpose(Gram2(j,1:j-1,r))) + // +sum(phi(j+1:p,mm,r).*transpose(Gram2(j,j+1:p,r))); + S[j*m*k+mm*k+r] = -rho[mm*m*k+mm*k+r] * ps2[j*m*k+mm*k+r] + dotPhiGram2; + if (fabs(S[j*m*k+mm*k+r]) <= n*lambda*pow(pi[r],gamma)) + phi[j*m*k+mm*k+r] = 0; + else if (S[j*m*k+mm*k+r] > n*lambda*pow(pi[r],gamma)) + phi[j*m*k+mm*k+r] = (n*lambda*pow(pi[r],gamma) - S[j*m*k+mm*k+r]) + / Gram2[j*p*k+j*k+r]; + else + phi[j*m*k+mm*k+r] = -(n*lambda*pow(pi[r],gamma) + S[j*m*k+mm*k+r]) + / Gram2[j*p*k+j*k+r]; + } + } + } + + ///////////// + // Etape E // + ///////////// + + int signum; + Real sumLogLLF2 = 0.0; + for (mwSize i=0; i<n; i++) + { + Real sumLLF1 = 0.0; + Real sumGamI = 0.0; + Real minDotProduct = INFINITY; + + for (mwSize r=0; r<k; r++) + { + //Compute + //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)... + // *phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) ); + //split in several sub-steps + + //compute Y(i,:)*rho(:,:,r) + for (mwSize u=0; u<m; u++) + { + YiRhoR[u] = 0.0; + for (mwSize v=0; v<m; v++) + YiRhoR[u] += Y[i*m+v] * rho[v*m*k+u*k+r]; + } + + //compute X(i,:)*phi(:,:,r) + for (mwSize u=0; u<m; u++) + { + XiPhiR[u] = 0.0; + for (mwSize v=0; v<p; v++) + XiPhiR[u] += X[i*p+v] * phi[v*m*k+u*k+r]; + } + + // compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) > + dotProducts[r] = 0.0; + for (mwSize u=0; u<m; u++) + dotProducts[r] += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]); + if (dotProducts[r] < minDotProduct) + minDotProduct = dotProducts[r]; + } + Real shift = 0.5*minDotProduct; + for (mwSize r=0; r<k; r++) + { + //compute det(rho(:,:,r)) [TODO: avoid re-computations] + for (mwSize u=0; u<m; u++) + { + for (mwSize v=0; v<m; v++) + matrix->data[u*m+v] = rho[u*m*k+v*k+r]; + } + gsl_linalg_LU_decomp(matrix, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrix, signum); + + Gam[i*k+r] = pi[r] * detRhoR * exp(-0.5*dotProducts[r] + shift); + sumLLF1 += Gam[i*k+r] / pow(2*M_PI,m/2.0); + sumGamI += Gam[i*k+r]; + } + sumLogLLF2 += log(sumLLF1); + for (mwSize r=0; r<k; r++) + { + //gam(i,r)=Gam(i,r)/sum(Gam(i,:)); + gam[i*k+r] = sumGamI > EPS + ? Gam[i*k+r] / sumGamI + : 0.0; + } + } + + //sum(pen(ite,:)) + Real sumPen = 0.0; + for (mwSize r=0; r<k; r++) + sumPen += pow(pi[r],gamma) * b[r]; + //LLF(ite)=-1/n*sum(log(LLF2(ite,:)))+lambda*sum(pen(ite,:)); + LLF[ite] = -invN * sumLogLLF2 + lambda * sumPen; + if (ite == 0) + dist = LLF[ite]; + else + dist = (LLF[ite] - LLF[ite-1]) / (1.0 + fabs(LLF[ite])); + + //Dist1=max(max((abs(phi-Phi))./(1+abs(phi)))); + Real Dist1 = 0.0; + for (mwSize u=0; u<p; u++) + { + for (mwSize v=0; v<m; v++) + { + for (mwSize w=0; w<k; w++) + { + Real tmpDist = fabs(phi[u*m*k+v*k+w]-Phi[u*m*k+v*k+w]) + / (1.0+fabs(phi[u*m*k+v*k+w])); + if (tmpDist > Dist1) + Dist1 = tmpDist; + } + } + } + //Dist2=max(max((abs(rho-Rho))./(1+abs(rho)))); + Real Dist2 = 0.0; + for (mwSize u=0; u<m; u++) + { + for (mwSize v=0; v<m; v++) + { + for (mwSize w=0; w<k; w++) + { + Real tmpDist = fabs(rho[u*m*k+v*k+w]-Rho[u*m*k+v*k+w]) + / (1.0+fabs(rho[u*m*k+v*k+w])); + if (tmpDist > Dist2) + Dist2 = tmpDist; + } + } + } + //Dist3=max(max((abs(pi-Pi))./(1+abs(Pi)))); + Real Dist3 = 0.0; + for (mwSize u=0; u<n; u++) + { + for (mwSize v=0; v<k; v++) + { + Real tmpDist = fabs(pi[v]-Pi[v]) / (1.0+fabs(pi[v])); + if (tmpDist > Dist3) + Dist3 = tmpDist; + } + } + //dist2=max([max(Dist1),max(Dist2),max(Dist3)]); + dist2 = Dist1; + if (Dist2 > dist2) + dist2 = Dist2; + if (Dist3 > dist2) + dist2 = Dist3; + + ite++; + } + + //free memory + free(b); + free(gam); + free(Gam); + free(Phi); + free(Rho); + free(Pi); + free(ps); + free(nY2); + free(ps1); + free(nY21); + free(Gram2); + free(ps2); + gsl_matrix_free(matrix); + gsl_permutation_free(permutation); + free(XiPhiR); + free(YiRhoR); + free(gam2); + free(pi2); + free(X2); + free(Y2); + free(dotProducts); +} diff --git a/ProcLassoMLE/EMGLLF.h b/ProcLassoMLE/EMGLLF.h new file mode 100644 index 0000000..c75b89d --- /dev/null +++ b/ProcLassoMLE/EMGLLF.h @@ -0,0 +1,31 @@ +#ifndef select_EMGLLF_H +#define select_EMGLLF_H + +#include "ioutils.h" + +void EMGLLF( + // IN parameters + const Real* phiInit, + const Real* rhoInit, + const Real* piInit, + const Real* gamInit, + Int mini, + Int maxi, + Real gamma, + Real lambda, + const Real* X, + const Real* Y, + Real tau, + // OUT parameters + Real* phi, + Real* rho, + Real* pi, + Real* LLF, + Real* S, + // additional size parameters + mwSize n, + mwSize p, + mwSize m, + mwSize k); + +#endif diff --git a/ProcLassoMLE/EMGLLF.m b/ProcLassoMLE/EMGLLF.m new file mode 100644 index 0000000..1be6ba0 --- /dev/null +++ b/ProcLassoMLE/EMGLLF.m @@ -0,0 +1,174 @@ +function[phi,rho,pi,LLF,S] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau) + + %Get matrices dimensions + PI = 4.0 * atan(1.0); + n = size(X, 1); + [p,m,k] = size(phiInit); + + %Initialize outputs + phi = phiInit; + rho = rhoInit; + pi = piInit; + LLF = zeros(maxi,1); + S = zeros(p,m,k); + + %Other local variables + %NOTE: variables order is always n,p,m,k + gam = gamInit; + Gram2 = zeros(p,p,k); + ps2 = zeros(p,m,k); + b = zeros(k,1); + pen = zeros(maxi,k); + X2 = zeros(n,p,k); + Y2 = zeros(n,m,k); + dist = 0; + dist2 = 0; + ite = 1; + pi2 = zeros(k,1); + ps = zeros(m,k); + nY2 = zeros(m,k); + ps1 = zeros(n,m,k); + nY21 = zeros(n,m,k); + Gam = zeros(n,k); + EPS = 1e-15; + + while ite<=mini || (ite<=maxi && (dist>=tau || dist2>=sqrt(tau))) + + Phi = phi; + Rho = rho; + Pi = pi; + + %Calculs associés à Y et X + for r=1:k + for mm=1:m + Y2(:,mm,r) = sqrt(gam(:,r)) .* Y(:,mm); + end + for i=1:n + X2(i,:,r) = X(i,:) .* sqrt(gam(i,r)); + end + for mm=1:m + ps2(:,mm,r) = transpose(X2(:,:,r)) * Y2(:,mm,r); + end + for j=1:p + for s=1:p + Gram2(j,s,r) = dot(X2(:,j,r), X2(:,s,r)); + end + end + end + + %%%%%%%%%% + %Etape M % + %%%%%%%%%% + + %Pour pi + for r=1:k + b(r) = sum(sum(abs(phi(:,:,r)))); + end + gam2 = sum(gam,1); + a = sum(gam*transpose(log(pi))); + + %tant que les proportions sont negatives + kk = 0; + pi2AllPositive = false; + while ~pi2AllPositive + pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi); + pi2AllPositive = true; + for r=1:k + if pi2(r) < 0 + pi2AllPositive = false; + break; + end + end + kk = kk+1; + end + + %t(m) la plus grande valeur dans la grille O.1^k tel que ce soit + %décroissante ou constante + while (-1/n*a+lambda*((pi.^gamma)*b))<(-1/n*gam2*transpose(log(pi2))+lambda.*(pi2.^gamma)*b) && kk<1000 + pi2 = pi+0.1^kk*(1/n*gam2-pi); + kk = kk+1; + end + t = 0.1^(kk); + pi = (pi+t*(pi2-pi)) / sum(pi+t*(pi2-pi)); + + %Pour phi et rho + for r=1:k + for mm=1:m + for i=1:n + ps1(i,mm,r) = Y2(i,mm,r) * dot(X2(i,:,r), phi(:,mm,r)); + nY21(i,mm,r) = (Y2(i,mm,r))^2; + end + ps(mm,r) = sum(ps1(:,mm,r)); + nY2(mm,r) = sum(nY21(:,mm,r)); + rho(mm,mm,r) = ((ps(mm,r)+sqrt(ps(mm,r)^2+4*nY2(mm,r)*(gam2(r))))/(2*nY2(mm,r))); + end + end + for r=1:k + for j=1:p + for mm=1:m + S(j,mm,r) = -rho(mm,mm,r)*ps2(j,mm,r) + dot(phi(1:j-1,mm,r),Gram2(j,1:j-1,r)')... + + dot(phi(j+1:p,mm,r),Gram2(j,j+1:p,r)'); + if abs(S(j,mm,r)) <= n*lambda*(pi(r)^gamma) + phi(j,mm,r)=0; + else + if S(j,mm,r)> n*lambda*(pi(r)^gamma) + phi(j,mm,r)=(n*lambda*(pi(r)^gamma)-S(j,mm,r))/Gram2(j,j,r); + else + phi(j,mm,r)=-(n*lambda*(pi(r)^gamma)+S(j,mm,r))/Gram2(j,j,r); + end + end + end + end + end + + %%%%%%%%%% + %Etape E % + %%%%%%%%%% + + sumLogLLF2 = 0.0; + for i=1:n + %precompute dot products to numerically adjust their values + dotProducts = zeros(k,1); + for r=1:k + dotProducts(r)= (Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r)); + end + shift = 0.5*min(dotProducts); + + %compute Gam(:,:) using shift determined above + sumLLF1 = 0.0; + for r=1:k + Gam(i,r) = pi(r)*det(rho(:,:,r))*exp(-0.5*dotProducts(r) + shift); + sumLLF1 = sumLLF1 + Gam(i,r)/(2*PI)^(m/2); + end + sumLogLLF2 = sumLogLLF2 + log(sumLLF1); + sumGamI = sum(Gam(i,:)); + if sumGamI > EPS + gam(i,:) = Gam(i,:) / sumGamI; + else + gam(i,:) = zeros(k,1); + end + end + + sumPen = 0.0; + for r=1:k + sumPen = sumPen + pi(r).^gamma .* b(r); + end + LLF(ite) = -(1/n)*sumLogLLF2 + lambda*sumPen; + + if ite == 1 + dist = LLF(ite); + else + dist = (LLF(ite)-LLF(ite-1))/(1+abs(LLF(ite))); + end + + Dist1=max(max(max((abs(phi-Phi))./(1+abs(phi))))); + Dist2=max(max(max((abs(rho-Rho))./(1+abs(rho))))); + Dist3=max(max((abs(pi-Pi))./(1+abs(Pi)))); + dist2=max([Dist1,Dist2,Dist3]); + + ite=ite+1; + end + + pi = transpose(pi); + +end diff --git a/ProcLassoMLE/EMGLLF_interface.c b/ProcLassoMLE/EMGLLF_interface.c new file mode 100644 index 0000000..470e845 --- /dev/null +++ b/ProcLassoMLE/EMGLLF_interface.c @@ -0,0 +1,125 @@ +#include "ioutils.h" +#include "EMGLLF.h" +#include <mex.h> + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction( + int nlhs, + mxArray* plhs[], + int nrhs, + const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=11) + mexErrMsgIdAndTxt("select:EMGLLF:nrhs","11 inputs required."); + if (nlhs!=5) + mexErrMsgIdAndTxt("select:EMGLLF:nlhs","5 outputs required."); + + // Get matrices dimensions + const mwSize n = mxGetDimensions(prhs[8])[0]; + const mwSize p = mxGetDimensions(prhs[0])[0]; + const mwSize m = mxGetDimensions(prhs[0])[1]; + const mwSize k = mxGetDimensions(prhs[0])[2]; + + //////////// + // INPUTS // + //////////// + + // phiInit + const mwSize* dimPhiInit = mxGetDimensions(prhs[0]); + Real* brPhiInit = matlabToBrArray_real(mxGetPr(prhs[0]), dimPhiInit, 3); + + // rhoInit + const mwSize* dimRhoInit = mxGetDimensions(prhs[1]); + Real* brRhoInit = matlabToBrArray_real(mxGetPr(prhs[1]), dimRhoInit, 3); + + // piInit + Real* piInit = mxGetPr(prhs[2]); + + // gamInit + const mwSize* dimGamInit = mxGetDimensions(prhs[3]); + Real* brGamInit = matlabToBrArray_real(mxGetPr(prhs[3]), dimGamInit, 2); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[4]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[5]))[0]; + + // gamma + Real gamma = mxGetScalar(prhs[6]); + + // lambda + Real lambda = mxGetScalar(prhs[7]); + + // X + const mwSize* dimX = mxGetDimensions(prhs[8]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[8]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[9]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[9]), dimY, 2); + + // tau + Real tau = mxGetScalar(prhs[10]); + + ///////////// + // OUTPUTS // + ///////////// + + // phi + const mwSize dimPhi[] = {dimPhiInit[0], dimPhiInit[1], dimPhiInit[2]}; + plhs[0] = mxCreateNumericArray(3,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // rho + const mwSize dimRho[] = {dimRhoInit[0], dimRhoInit[1], dimRhoInit[2]}; + plhs[1] = mxCreateNumericArray(3,dimRho,mxDOUBLE_CLASS,mxREAL); + Real* rho = mxGetPr(plhs[1]); + + // pi + plhs[2] = mxCreateNumericMatrix(k,1,mxDOUBLE_CLASS,mxREAL); + Real* pi = mxGetPr(plhs[2]); + + // LLF + plhs[3] = mxCreateNumericMatrix(maxi,1,mxDOUBLE_CLASS,mxREAL); + Real* LLF = mxGetPr(plhs[3]); + + // S + const mwSize dimS[] = {p, m, k}; + plhs[4] = mxCreateNumericArray(3,dimS,mxDOUBLE_CLASS,mxREAL); + Real* S = mxGetPr(plhs[4]); + + //////////////////// + // Call to EMGLLF // + //////////////////// + + EMGLLF(brPhiInit,brRhoInit,piInit,brGamInit,mini,maxi,gamma,lambda,brX,brY,tau, + phi,rho,pi,LLF,S, + n,p,m,k); + + free(brPhiInit); + free(brRhoInit); + free(brGamInit); + free(brX); + free(brY); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 3); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]); + free(mlPhi); + + Real* mlRho = brToMatlabArray_real(rho, dimRho, 3); + copyArray(mlRho, rho, dimRho[0]*dimRho[1]*dimRho[2]); + free(mlRho); + + Real* mlS = brToMatlabArray_real(S, dimS, 3); + copyArray(mlS, S, dimS[0]*dimS[1]*dimS[2]); + free(mlS); +} diff --git a/ProcLassoMLE/compileMex.m b/ProcLassoMLE/compileMex.m new file mode 100644 index 0000000..e595409 --- /dev/null +++ b/ProcLassoMLE/compileMex.m @@ -0,0 +1,9 @@ +%compile C code (for MATLAB or Octave) +if exist('octave_config_info') + setenv('CFLAGS','-O2 -std=gnu99 -fopenmp') + mkoctfile --mex -DOctave -I../Util EMGLLF.c EMGLLF_interface.c ../Util/ioutils.c -o EMGLLF -lm -lgsl -lgslcblas -lgomp + mkoctfile --mex -DOctave -I../Util constructionModelesLassoMLE.c constructionModelesLassoMLE_interface.c EMGLLF.c ../Util/ioutils.c -o constructionModelesLassoMLE -lm -lgsl -lgslcblas -lgomp +else + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util EMGLLF.c EMGLLF_interface.c ../Util/ioutils.c -output EMGLLF -lm -lgsl -lgslcblas -lgomp + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util constructionModelesLassoMLE.c constructionModelesLassoMLE_interface.c EMGLLF.c ../Util/ioutils.c -output constructionModelesLassoMLE -lm -lgsl -lgslcblas -lgomp +end diff --git a/ProcLassoMLE/constructionModelesLassoMLE.c b/ProcLassoMLE/constructionModelesLassoMLE.c new file mode 100644 index 0000000..bcbfd3c --- /dev/null +++ b/ProcLassoMLE/constructionModelesLassoMLE.c @@ -0,0 +1,218 @@ +#include "EMGLLF.h" +#include "constructionModelesLassoMLE.h" +#include <gsl/gsl_linalg.h> +#include <omp.h> +#include "omp_num_threads.h" + +// TODO: comment on constructionModelesLassoMLE purpose +void constructionModelesLassoMLE( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + Real gamma, // valeur de gamma : puissance des proportions dans la pénalisation pour un Lasso adaptatif + const Real* glambda, // valeur des paramètres de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real seuil, // seuil pour prendre en compte une variable + Real tau, // seuil pour accepter la convergence + const Int* A1, // matrice des coefficients des parametres selectionnes + const Int* A2, // matrice des coefficients des parametres non selectionnes + // OUT parameters + Real* phi, // estimateur ainsi calculé par le Lasso + Real* rho, // estimateur ainsi calculé par le Lasso + Real* pi, // estimateur ainsi calculé par le Lasso + Real* lvraisemblance, // estimateur ainsi calculé par le Lasso + // additional size parameters + mwSize n, // taille de l'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k, // nombre de composantes + mwSize L) // taille de glambda +{ + //preparation: phi = 0 + for (mwSize u=0; u<p*m*k*L; u++) + phi[u] = 0.0; + + //initiate parallel section + mwSize lambdaIndex; + omp_set_num_threads(OMP_NUM_THREADS); + #pragma omp parallel default(shared) private(lambdaIndex) + { + #pragma omp for schedule(dynamic,CHUNK_SIZE) nowait + for (lambdaIndex=0; lambdaIndex<L; lambdaIndex++) + { + //~ a = A1(:,1,lambdaIndex); + //~ a(a==0) = []; + Int* a = (Int*)malloc(p*sizeof(Int)); + mwSize lengthA = 0; + for (mwSize j=0; j<p; j++) + { + if (A1[j*(m+1)*L+0*L+lambdaIndex] != 0) + a[lengthA++] = A1[j*(m+1)*L+0*L+lambdaIndex] - 1; + } + if (lengthA == 0) + continue; + + //Xa = X(:,a) + Real* Xa = (Real*)malloc(n*lengthA*sizeof(Real)); + for (mwSize i=0; i<n; i++) + { + for (mwSize j=0; j<lengthA; j++) + Xa[i*lengthA+j] = X[i*p+a[j]]; + } + + //phia = phiInit(a,:,:) + Real* phia = (Real*)malloc(lengthA*m*k*sizeof(Real)); + for (mwSize j=0; j<lengthA; j++) + { + for (mwSize mm=0; mm<m; mm++) + { + for (mwSize r=0; r<k; r++) + phia[j*m*k+mm*k+r] = phiInit[a[j]*m*k+mm*k+r]; + } + } + + //[phiLambda,rhoLambda,piLambda,~,~] = EMGLLF(... + // phiInit(a,:,:),rhoInit,piInit,gamInit,mini,maxi,gamma,0,X(:,a),Y,tau); + Real* phiLambda = (Real*)malloc(lengthA*m*k*sizeof(Real)); + Real* rhoLambda = (Real*)malloc(m*m*k*sizeof(Real)); + Real* piLambda = (Real*)malloc(k*sizeof(Real)); + Real* LLF = (Real*)malloc((maxi+1)*sizeof(Real)); + Real* S = (Real*)malloc(lengthA*m*k*sizeof(Real)); + EMGLLF(phia,rhoInit,piInit,gamInit,mini,maxi,gamma,0.0,Xa,Y,tau, + phiLambda,rhoLambda,piLambda,LLF,S, + n,lengthA,m,k); + free(Xa); + free(phia); + free(LLF); + free(S); + + //~ for j=1:length(a) + //~ phi(a(j),:,:,lambdaIndex) = phiLambda(j,:,:); + //~ end + for (mwSize j=0; j<lengthA; j++) + { + for (mwSize mm=0; mm<m; mm++) + { + for (mwSize r=0; r<k; r++) + phi[a[j]*m*k*L+mm*k*L+r*L+lambdaIndex] = phiLambda[j*m*k+mm*k+r]; + } + } + free(phiLambda); + //~ rho(:,:,:,lambdaIndex) = rhoLambda; + for (mwSize u=0; u<m; u++) + { + for (mwSize v=0; v<m; v++) + { + for (mwSize r=0; r<k; r++) + rho[u*m*k*L+v*k*L+r*L+lambdaIndex] = rhoLambda[u*m*k+v*k+r]; + } + } + free(rhoLambda); + //~ pi(:,lambdaIndex) = piLambda; + for (mwSize r=0; r<k; r++) + pi[r*L+lambdaIndex] = piLambda[r]; + free(piLambda); + + mwSize dimension = 0; + Int* b = (Int*)malloc(m*sizeof(Int)); + for (mwSize j=0; j<p; j++) + { + //~ b = A2(j,2:end,lambdaIndex); + //~ b(b==0) = []; + mwSize lengthB = 0; + for (mwSize mm=0; mm<m; mm++) + { + if (A2[j*(m+1)*L+(mm+1)*L+lambdaIndex] != 0) + b[lengthB++] = A2[j*(m+1)*L+(mm+1)*L+lambdaIndex] - 1; + } + //~ if length(b) > 0 + //~ phi(A2(j,1,lambdaIndex),b,:,lambdaIndex) = 0.0; + //~ end + if (lengthB > 0) + { + for (mwSize mm=0; mm<lengthB; mm++) + { + for (mwSize r=0; r<k; r++) + phi[(A2[j*(m+1)*L+0*L+lambdaIndex]-1)*m*k*L + b[mm]*k*L + r*L + lambdaIndex] = 0.0; + } + } + + //~ c = A1(j,2:end,lambdaIndex); + //~ c(c==0) = []; + //~ dimension = dimension + length(c); + for (mwSize mm=0; mm<m; mm++) + { + if (A1[j*(m+1)*L+(mm+1)*L+lambdaIndex] != 0) + dimension++; + } + } + free(b); + + int signum; + Real* densite = (Real*)calloc(L*n,sizeof(Real)); + Real sumLogDensit = 0.0; + gsl_matrix* matrix = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + for (mwSize i=0; i<n; i++) + { + //~ for r=1:k + //~ delta = Y(i,:)*rho(:,:,r,lambdaIndex) - (X(i,a)*(phi(a,:,r,lambdaIndex))); + //~ densite(i,lambdaIndex) = densite(i,lambdaIndex) +... + //~ pi(r,lambdaIndex)*det(rho(:,:,r,lambdaIndex))/(sqrt(2*PI))^m*exp(-dot(delta,delta)/2.0); + //~ end + for (mwSize r=0; r<k; r++) + { + //compute det(rho(:,:,r,lambdaIndex)) [TODO: avoid re-computations] + for (mwSize u=0; u<m; u++) + { + for (mwSize v=0; v<m; v++) + matrix->data[u*m+v] = rho[u*m*k*L+v*k*L+r*L+lambdaIndex]; + } + gsl_linalg_LU_decomp(matrix, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrix, signum); + + //compute Y(i,:)*rho(:,:,r,lambdaIndex) + for (mwSize u=0; u<m; u++) + { + YiRhoR[u] = 0.0; + for (mwSize v=0; v<m; v++) + YiRhoR[u] += Y[i*m+v] * rho[v*m*k*L+u*k*L+r*L+lambdaIndex]; + } + + //compute X(i,a)*phi(a,:,r,lambdaIndex) + for (mwSize u=0; u<m; u++) + { + XiPhiR[u] = 0.0; + for (mwSize v=0; v<lengthA; v++) + XiPhiR[u] += X[i*p+a[v]] * phi[a[v]*m*k*L+u*k*L+r*L+lambdaIndex]; + } + // On peut remplacer X par Xa dans ce dernier calcul, mais je ne sais pas si c'est intéressant ... + + // compute dotProduct < delta . delta > + Real dotProduct = 0.0; + for (mwSize u=0; u<m; u++) + dotProduct += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]); + + densite[lambdaIndex*n+i] += (pi[r*L+lambdaIndex]*detRhoR/pow(sqrt(2.0*M_PI),m))*exp(-dotProduct/2.0); + } + sumLogDensit += log(densite[lambdaIndex*n+i]); + } + lvraisemblance[lambdaIndex*2+0] = sumLogDensit; + lvraisemblance[lambdaIndex*2+1] = (dimension+m+1)*k-1; + + free(a); + free(YiRhoR); + free(XiPhiR); + free(densite); + gsl_matrix_free(matrix); + gsl_permutation_free(permutation); + } + } +} diff --git a/ProcLassoMLE/constructionModelesLassoMLE.h b/ProcLassoMLE/constructionModelesLassoMLE.h new file mode 100644 index 0000000..90712e3 --- /dev/null +++ b/ProcLassoMLE/constructionModelesLassoMLE.h @@ -0,0 +1,34 @@ +#ifndef select_constructionModelesLassoMLE_H +#define select_constructionModelesLassoMLE_H + +#include "ioutils.h" + +void constructionModelesLassoMLE( + // IN parameters + const Real* phiInit, + const Real* rhoInit, + const Real* piInit, + const Real* gamInit, + Int mini, + Int maxi, + Real gamma, + const Real* glambda, + const Real* X, + const Real* Y, + Real seuil, + Real tau, + const Int* A1, + const Int* A2, + // OUT parameters + Real* phi, + Real* rho, + Real* pi, + Real* lvraisemblance, + // additional size parameters + mwSize n, + mwSize p, + mwSize m, + mwSize k, + mwSize L); + +#endif diff --git a/ProcLassoMLE/constructionModelesLassoMLE.m b/ProcLassoMLE/constructionModelesLassoMLE.m new file mode 100644 index 0000000..9f977ac --- /dev/null +++ b/ProcLassoMLE/constructionModelesLassoMLE.m @@ -0,0 +1,58 @@ +function[phi,rho,pi,lvraisemblance] = constructionModelesLassoMLE(... + phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau,A1,A2) + + PI = 4.0 * atan(1.0); + + %get matrix sizes + n = size(X, 1); + [p,m,k] = size(phiInit); + L = length(glambda); + + %output parameters + phi = zeros(p,m,k,L); + rho = zeros(m,m,k,L); + pi = zeros(k,L); + lvraisemblance = zeros(L,2); + + for lambdaIndex=1:L + % Procedure Lasso-MLE + a = A1(:,1,lambdaIndex); + a(a==0) = []; + if length(a) == 0 + continue; + end + [phiLambda,rhoLambda,piLambda,~,~] = EMGLLF(... + phiInit(a,:,:),rhoInit,piInit,gamInit,mini,maxi,gamma,0,X(:,a),Y,tau); + + for j=1:length(a) + phi(a(j),:,:,lambdaIndex) = phiLambda(j,:,:); + end + rho(:,:,:,lambdaIndex) = rhoLambda; + pi(:,lambdaIndex) = piLambda; + + dimension = 0; + for j=1:p + b = A2(j,2:end,lambdaIndex); + b(b==0) = []; + if length(b) > 0 + phi(A2(j,1,lambdaIndex),b,:,lambdaIndex) = 0.0; + end + c = A1(j,2:end,lambdaIndex); + c(c==0) = []; + dimension = dimension + length(c); + end + + %on veut calculer l'EMV avec toutes nos estimations + densite = zeros(n,L); + for i=1:n + for r=1:k + delta = Y(i,:)*rho(:,:,r,lambdaIndex) - (X(i,a)*(phi(a,:,r,lambdaIndex))); + densite(i,lambdaIndex) = densite(i,lambdaIndex) +... + pi(r,lambdaIndex)*det(rho(:,:,r,lambdaIndex))/(sqrt(2*PI))^m*exp(-dot(delta,delta)/2.0); + end + end + lvraisemblance(lambdaIndex,1) = sum(log(densite(:,lambdaIndex))); + lvraisemblance(lambdaIndex,2) = (dimension+m+1)*k-1; + end + +end diff --git a/ProcLassoMLE/constructionModelesLassoMLE_interface.c b/ProcLassoMLE/constructionModelesLassoMLE_interface.c new file mode 100644 index 0000000..a331c67 --- /dev/null +++ b/ProcLassoMLE/constructionModelesLassoMLE_interface.c @@ -0,0 +1,142 @@ +#include "ioutils.h" +#include "constructionModelesLassoMLE.h" +#include <mex.h> + +#include <stdio.h> + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction( + int nlhs, + mxArray* plhs[], + int nrhs, + const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=14) + mexErrMsgIdAndTxt("select:constructionModelesLassoMLE:nrhs","14 inputs required."); + if (nlhs!=4) + mexErrMsgIdAndTxt("select:constructionModelesLassoMLE:nlhs","4 outputs required."); + + // Get matrices dimensions + const mwSize n = mxGetDimensions(prhs[8])[0]; + const mwSize p = mxGetDimensions(prhs[0])[0]; + const mwSize m = mxGetDimensions(prhs[0])[1]; + const mwSize k = mxGetDimensions(prhs[0])[2]; + const mwSize L = mxGetNumberOfElements(prhs[7]); + + //////////// + // INPUTS // + //////////// + + // phiInit + const mwSize* dimPhiInit = mxGetDimensions(prhs[0]); + Real* brPhiInit = matlabToBrArray_real(mxGetPr(prhs[0]), dimPhiInit, 3); + + // rhoInit + const mwSize* dimRhoInit = mxGetDimensions(prhs[1]); + Real* brRhoInit = matlabToBrArray_real(mxGetPr(prhs[1]), dimRhoInit, 3); + + // piInit + Real* piInit = mxGetPr(prhs[2]); + + // gamInit + const mwSize* dimGamInit = mxGetDimensions(prhs[3]); + Real* brGamInit = matlabToBrArray_real(mxGetPr(prhs[3]), dimGamInit, 2); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[4]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[5]))[0]; + + // gamma + Real gamma = mxGetScalar(prhs[6]); + + // glambda + Real* glambda = mxGetPr(prhs[7]); + + // X + const mwSize* dimX = mxGetDimensions(prhs[8]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[8]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[9]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[9]), dimY, 2); + + //seuil + Real seuil = mxGetScalar(prhs[10]); + + // tau + Real tau = mxGetScalar(prhs[11]); + + // A1 + const mwSize* dimA = mxGetDimensions(prhs[12]); + Int* brA1 = matlabToBrArray_int(mxGetData(prhs[12]), dimA, 3); + + // A2 + Int* brA2 = matlabToBrArray_int(mxGetData(prhs[13]), dimA, 3); + + ///////////// + // OUTPUTS // + ///////////// + + // phi + const mwSize dimPhi[] = {dimPhiInit[0], dimPhiInit[1], dimPhiInit[2], L}; + plhs[0] = mxCreateNumericArray(4,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // rho + const mwSize dimRho[] = {dimRhoInit[0], dimRhoInit[1], dimRhoInit[2], L}; + plhs[1] = mxCreateNumericArray(4,dimRho,mxDOUBLE_CLASS,mxREAL); + Real* rho = mxGetPr(plhs[1]); + + // pi + const mwSize dimPi[] = {k, L}; + plhs[2] = mxCreateNumericMatrix(dimPi[0],dimPi[1],mxDOUBLE_CLASS,mxREAL); + Real* pi = mxGetPr(plhs[2]); + + // lvraisemblance + const mwSize dimLvraisemblance[] = {L, 2}; + plhs[3] = mxCreateNumericMatrix(L, 2, mxDOUBLE_CLASS,mxREAL); + Real* lvraisemblance = mxGetPr(plhs[3]); + + ///////////////////////////////////////// + // Call to constructionModelesLassoMLE // + ///////////////////////////////////////// + + constructionModelesLassoMLE( + brPhiInit,brRhoInit,piInit,brGamInit,mini,maxi,gamma,glambda,brX,brY,seuil,tau,brA1,brA2, + phi,rho,pi,lvraisemblance, + n,p,m,k,L); + + free(brPhiInit); + free(brRhoInit); + free(brGamInit); + free(brX); + free(brY); + free(brA1); + free(brA2); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 4); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]*dimPhi[3]); + free(mlPhi); + + Real* mlRho = brToMatlabArray_real(rho, dimRho, 4); + copyArray(mlRho, rho, dimRho[0]*dimRho[1]*dimRho[2]*dimRho[3]); + free(mlRho); + + Real* mlPi = brToMatlabArray_real(pi, dimPi, 2); + copyArray(mlPi, pi, dimPi[0]*dimPi[1]); + free(mlPi); + + Real* mlLvraisemblance = brToMatlabArray_real(lvraisemblance, dimLvraisemblance, 2); + copyArray(mlLvraisemblance, lvraisemblance, dimLvraisemblance[0]*dimLvraisemblance[1]); + free(mlLvraisemblance); +} diff --git a/ProcLassoRank/EMGrank.c b/ProcLassoRank/EMGrank.c new file mode 100644 index 0000000..721c0c2 --- /dev/null +++ b/ProcLassoRank/EMGrank.c @@ -0,0 +1,308 @@ +#include "EMGrank.h" +#include <gsl/gsl_linalg.h> + +// Compute pseudo-inverse of a square matrix +static Real* pinv(const Real* matrix, mwSize dim) +{ + gsl_matrix* U = gsl_matrix_alloc(dim,dim); + gsl_matrix* V = gsl_matrix_alloc(dim,dim); + gsl_vector* S = gsl_vector_alloc(dim); + gsl_vector* work = gsl_vector_alloc(dim); + Real EPS = 1e-10; //threshold for singular value "== 0" + + //copy matrix into U + for (mwSize i=0; i<dim*dim; i++) + U->data[i] = matrix[i]; + + //U,S,V = SVD of matrix + gsl_linalg_SV_decomp(U, V, S, work); + gsl_vector_free(work); + + // Obtain pseudo-inverse by V*S^{-1}*t(U) + Real* inverse = (Real*)malloc(dim*dim*sizeof(Real)); + for (mwSize i=0; i<dim; i++) + { + for (mwSize ii=0; ii<dim; ii++) + { + Real dotProduct = 0.0; + for (mwSize j=0; j<dim; j++) + dotProduct += V->data[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j]; + inverse[i*dim+ii] = dotProduct; + } + } + + gsl_matrix_free(U); + gsl_matrix_free(V); + gsl_vector_free(S); + return inverse; +} + +// TODO: comment EMGrank purpose +void EMGrank( + // IN parameters + const Real* Pi, // parametre de proportion + const Real* Rho, // parametre initial de variance renormalisé + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + const Int* rank, // vecteur des rangs possibles + // OUT parameters + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres + // additional size parameters + mwSize n, // taille de l'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k) // nombre de composantes +{ + // Allocations, initializations + Real* Phi = (Real*)calloc(p*m*k,sizeof(Real)); + Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real)); + int signum; + Real invN = 1.0/n; + int deltaPhiBufferSize = 20; + Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real)); + mwSize ite = 0; + Real sumDeltaPhi = 0.0; + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real* Xr = (Real*)malloc(n*p*sizeof(Real)); + Real* Yr = (Real*)malloc(n*m*sizeof(Real)); + Real* tXrXr = (Real*)malloc(p*p*sizeof(Real)); + Real* tXrYr = (Real*)malloc(p*m*sizeof(Real)); + gsl_matrix* matrixM = gsl_matrix_alloc(p, m); + gsl_matrix* matrixE = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + gsl_matrix* V = gsl_matrix_alloc(m,m); + gsl_vector* S = gsl_vector_alloc(m); + gsl_vector* work = gsl_vector_alloc(m); + + //Initialize class memberships (all elements in class 0; TODO: randomize ?) + Int* Z = (Int*)calloc(n, sizeof(Int)); + + //Initialize phi to zero, because some M loops might exit before phi affectation + for (mwSize i=0; i<p*m*k; i++) + phi[i] = 0.0; + + while (ite<mini || (ite<maxi && sumDeltaPhi>tau)) + { + ///////////// + // Etape M // + ///////////// + + //M step: Mise à jour de Beta (et donc phi) + for (mwSize r=0; r<k; r++) + { + //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:) + mwSize cardClustR=0; + for (mwSize i=0; i<n; i++) + { + if (Z[i] == r) + { + for (mwSize j=0; j<p; j++) + Xr[cardClustR*p+j] = X[i*p+j]; + for (mwSize j=0; j<m; j++) + Yr[cardClustR*m+j] = Y[i*m+j]; + cardClustR++; + } + } + if (cardClustR == 0) + continue; + + //Compute tXrXr = t(Xr) * Xr + for (mwSize j=0; j<p; j++) + { + for (mwSize jj=0; jj<p; jj++) + { + Real dotProduct = 0.0; + for (mwSize u=0; u<cardClustR; u++) + dotProduct += Xr[u*p+j] * Xr[u*p+jj]; + tXrXr[j*p+jj] = dotProduct; + } + } + + //Get pseudo inverse = (t(Xr)*Xr)^{-1} + Real* invTXrXr = pinv(tXrXr, p); + + // Compute tXrYr = t(Xr) * Yr + for (mwSize j=0; j<p; j++) + { + for (mwSize jj=0; jj<m; jj++) + { + Real dotProduct = 0.0; + for (mwSize u=0; u<cardClustR; u++) + dotProduct += Xr[u*p+j] * Yr[u*m+jj]; + tXrYr[j*m+jj] = dotProduct; + } + } + + //Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr + for (mwSize j=0; j<p; j++) + { + for (mwSize jj=0; jj<m; jj++) + { + Real dotProduct = 0.0; + for (mwSize u=0; u<p; u++) + dotProduct += invTXrXr[j*p+u] * tXrYr[u*m+jj]; + matrixM->data[j*m+jj] = dotProduct; + } + } + free(invTXrXr); + + //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + gsl_linalg_SV_decomp(matrixM, V, S, work); + + //Set m-rank(r) singular values to zero, and recompose + //best rank(r) approximation of the initial product + for (mwSize j=rank[r]; j<m; j++) + S->data[j] = 0.0; + + //[intermediate step] Compute hatBetaR = U * S * t(V) + Real* U = matrixM->data; + for (mwSize j=0; j<p; j++) + { + for (mwSize jj=0; jj<m; jj++) + { + Real dotProduct = 0.0; + for (mwSize u=0; u<m; u++) + dotProduct += U[j*m+u] * S->data[u] * V->data[jj*m+u]; + hatBetaR[j*m+jj] = dotProduct; + } + } + + //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r) + for (mwSize j=0; j<p; j++) + { + for (mwSize jj=0; jj<m; jj++) + { + Real dotProduct=0.0; + for (mwSize u=0; u<m; u++) + dotProduct += hatBetaR[j*m+u] * Rho[u*m*k+jj*k+r]; + phi[j*m*k+jj*k+r] = dotProduct; + } + } + } + + ///////////// + // Etape E // + ///////////// + + Real sumLogLLF2 = 0.0; + for (mwSize i=0; i<n; i++) + { + Real sumLLF1 = 0.0; + Real maxLogGamIR = -INFINITY; + for (mwSize r=0; r<k; r++) + { + //Compute + //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)... + // *phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) ); + //split in several sub-steps + + //compute det(Rho(:,:,r)) [TODO: avoid re-computations] + for (mwSize j=0; j<m; j++) + { + for (mwSize jj=0; jj<m; jj++) + matrixE->data[j*m+jj] = Rho[j*m*k+jj*k+r]; + } + gsl_linalg_LU_decomp(matrixE, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrixE, signum); + + //compute Y(i,:)*Rho(:,:,r) + for (mwSize j=0; j<m; j++) + { + YiRhoR[j] = 0.0; + for (mwSize u=0; u<m; u++) + YiRhoR[j] += Y[i*m+u] * Rho[u*m*k+j*k+r]; + } + + //compute X(i,:)*phi(:,:,r) + for (mwSize j=0; j<m; j++) + { + XiPhiR[j] = 0.0; + for (mwSize u=0; u<p; u++) + XiPhiR[j] += X[i*p+u] * phi[u*m*k+j*k+r]; + } + + //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) > + Real dotProduct = 0.0; + for (mwSize u=0; u<m; u++) + dotProduct += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]); + Real logGamIR = log(Pi[r]) + log(detRhoR) - 0.5*dotProduct; + + //Z(i) = index of max (gam(i,:)) + if (logGamIR > maxLogGamIR) + { + Z[i] = r; + maxLogGamIR = logGamIR; + } + sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0); + } + + sumLogLLF2 += log(sumLLF1); + } + + // Assign output variable LLF + *LLF = -invN * sumLogLLF2; + + //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi)))); + Real newDeltaPhi = 0.0; + for (mwSize j=0; j<p; j++) + { + for (mwSize jj=0; jj<m; jj++) + { + for (mwSize r=0; r<k; r++) + { + Real tmpDist = fabs(phi[j*m*k+jj*k+r]-Phi[j*m*k+jj*k+r]) + / (1.0+fabs(phi[j*m*k+jj*k+r])); + if (tmpDist > newDeltaPhi) + newDeltaPhi = tmpDist; + } + } + } + + //update distance parameter to check algorithm convergence (delta(phi, Phi)) + //TODO: deltaPhi should be a linked list for perf. + if (ite < deltaPhiBufferSize) + deltaPhi[ite] = newDeltaPhi; + else + { + sumDeltaPhi -= deltaPhi[0]; + for (int u=0; u<deltaPhiBufferSize-1; u++) + deltaPhi[u] = deltaPhi[u+1]; + deltaPhi[deltaPhiBufferSize-1] = newDeltaPhi; + } + sumDeltaPhi += newDeltaPhi; + + // update other local variables + for (mwSize j=0; j<m; j++) + { + for (mwSize jj=0; jj<p; jj++) + { + for (mwSize r=0; r<k; r++) + Phi[j*m*k+jj*k+r] = phi[j*m*k+jj*k+r]; + } + } + ite++; + } + + //free memory + free(hatBetaR); + free(deltaPhi); + free(Phi); + gsl_matrix_free(matrixE); + gsl_matrix_free(matrixM); + gsl_permutation_free(permutation); + gsl_vector_free(work); + gsl_matrix_free(V); + gsl_vector_free(S); + free(XiPhiR); + free(YiRhoR); + free(Xr); + free(Yr); + free(tXrXr); + free(tXrYr); + free(Z); +} diff --git a/ProcLassoRank/EMGrank.h b/ProcLassoRank/EMGrank.h new file mode 100644 index 0000000..8c0e657 --- /dev/null +++ b/ProcLassoRank/EMGrank.h @@ -0,0 +1,25 @@ +#ifndef select_EMGrank_H +#define select_EMGrank_H + +#include "ioutils.h" + +void EMGrank( + // IN parameters + const Real* Pi, + const Real* Rho, + Int mini, + Int maxi, + const Real* X, + const Real* Y, + Real tau, + const Int* rank, + // OUT parameters + Real* phi, + Real* LLF, + // additional size parameters + mwSize n, + mwSize p, + mwSize m, + mwSize k); + +#endif diff --git a/ProcLassoRank/EMGrank.m b/ProcLassoRank/EMGrank.m new file mode 100644 index 0000000..6ffc313 --- /dev/null +++ b/ProcLassoRank/EMGrank.m @@ -0,0 +1,69 @@ +function[phi,LLF] = EMGrank(Pi,Rho,mini,maxi,X,Y,tau,rank) + + % get matrix sizes + [~,m,k] = size(Rho); + [n,p] = size(X); + + % allocate output matrices + phi = zeros(p,m,k); + Z = ones(n,1,'int64'); + LLF = 0.0; + + % local variables + Phi = zeros(p,m,k); + deltaPhi = 0.0; + deltaPhi = []; + sumDeltaPhi = 0.0; + deltaPhiBufferSize = 20; + + %main loop (at least mini iterations) + ite = int64(1); + while ite<=mini || (ite<=maxi && sumDeltaPhi>tau) + + %M step: Mise à jour de Beta (et donc phi) + for r=1:k + if (sum(Z==r) == 0) + continue; + end + %U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + [U,S,V] = svd(pinv(transpose(X(Z==r,:))*X(Z==r,:))*transpose(X(Z==r,:))*Y(Z==r,:)); + %Set m-rank(r) singular values to zero, and recompose + %best rank(r) approximation of the initial product + S(rank(r)+1:end,:) = 0; + phi(:,:,r) = U * S * transpose(V) * Rho(:,:,r); + end + + %Etape E et calcul de LLF + sumLogLLF2 = 0.0; + for i=1:n + sumLLF1 = 0.0; + maxLogGamIR = -Inf; + for r=1:k + dotProduct = (Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r)); + logGamIR = log(Pi(r)) + log(det(Rho(:,:,r))) - 0.5*dotProduct; + %Z(i) = index of max (gam(i,:)) + if logGamIR > maxLogGamIR + Z(i) = r; + maxLogGamIR = logGamIR; + end + sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2); + end + sumLogLLF2 = sumLogLLF2 + log(sumLLF1); + end + + LLF = -1/n * sumLogLLF2; + + % update distance parameter to check algorithm convergence (delta(phi, Phi)) + deltaPhi = [ deltaPhi, max(max(max((abs(phi-Phi))./(1+abs(phi))))) ]; + if length(deltaPhi) > deltaPhiBufferSize + deltaPhi = deltaPhi(2:length(deltaPhi)); + end + sumDeltaPhi = sum(abs(deltaPhi)); + + % update other local variables + Phi = phi; + ite = ite+1; + + end + +end diff --git a/ProcLassoRank/EMGrank_interface.c b/ProcLassoRank/EMGrank_interface.c new file mode 100644 index 0000000..778ef67 --- /dev/null +++ b/ProcLassoRank/EMGrank_interface.c @@ -0,0 +1,90 @@ +#include "ioutils.h" +#include "EMGrank.h" +#include <mex.h> + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction( + int nlhs, + mxArray* plhs[], + int nrhs, + const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=8) + mexErrMsgIdAndTxt("select:EMGrank:nrhs","8 inputs required."); + if (nlhs!=2) + mexErrMsgIdAndTxt("select:EMGrank:nlhs","3 outputs required."); + + // Get matrices dimensions + const mwSize n = mxGetDimensions(prhs[4])[0]; + const mwSize p = mxGetDimensions(prhs[4])[1]; + const mwSize m = mxGetDimensions(prhs[1])[0]; + const mwSize k = mxGetDimensions(prhs[1])[2]; + + //////////// + // INPUTS // + //////////// + + // Pi + Real* Pi = mxGetPr(prhs[0]); + + // Rho + const mwSize* dimRho = mxGetDimensions(prhs[1]); + Real* brRho = matlabToBrArray_real(mxGetPr(prhs[1]), dimRho, 3); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[2]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[3]))[0]; + + // X + const mwSize* dimX = mxGetDimensions(prhs[4]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[4]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[5]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[5]), dimY, 2); + + // tau + Real tau = mxGetScalar(prhs[6]); + + // rank + Int* rank = (Int*)mxGetData(prhs[7]); + + ///////////// + // OUTPUTS // + ///////////// + + // phi + const mwSize dimPhi[] = {p, m, k}; + plhs[0] = mxCreateNumericArray(3,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // LLF + plhs[1] = mxCreateNumericMatrix(1,1,mxDOUBLE_CLASS,mxREAL); + Real* LLF = mxGetPr(plhs[1]); + + ///////////////////// + // Call to EMGrank // + ///////////////////// + + EMGrank(Pi,brRho,mini,maxi,brX,brY,tau,rank, + phi,LLF, + n,p,m,k); + + free(brRho); + free(brX); + free(brY); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 3); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]); + free(mlPhi); +} diff --git a/ProcLassoRank/compileMex.m b/ProcLassoRank/compileMex.m new file mode 100644 index 0000000..044f442 --- /dev/null +++ b/ProcLassoRank/compileMex.m @@ -0,0 +1,9 @@ +%compile C code (for MATLAB or Octave) +if exist('octave_config_info') + setenv('CFLAGS','-O2 -std=gnu99 -fopenmp') + mkoctfile --mex -DOctave -I../Util EMGrank.c EMGrank_interface.c ../Util/ioutils.c -o EMGrank -lm -lgsl -lgslcblas -lgomp + mkoctfile --mex -DOctave -I../Util constructionModelesLassoRank.c constructionModelesLassoRank_interface.c EMGrank.c ../Util/ioutils.c -o constructionModelesLassoRank -lm -lgsl -lgslcblas -lgomp +else + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util EMGrank.c EMGrank_interface.c ../Util/ioutils.c -output EMGrank -lm -lgsl -lgslcblas -lgomp + mex CFLAGS="\$CFLAGS -O2 -std=gnu99 -fopenmp" -I../Util constructionModelesLassoRank.c constructionModelesLassoRank_interface.c EMGrank.c ../Util/ioutils.c -output constructionModelesLassoRank -lm -lgsl -lgslcblas -lgomp +end diff --git a/ProcLassoRank/constructionModelesLassoRank.c b/ProcLassoRank/constructionModelesLassoRank.c new file mode 100644 index 0000000..98c02d5 --- /dev/null +++ b/ProcLassoRank/constructionModelesLassoRank.c @@ -0,0 +1,131 @@ +#include "EMGrank.h" +#include "constructionModelesLassoRank.h" +#include <gsl/gsl_linalg.h> +#include <omp.h> +#include "omp_num_threads.h" + +// TODO: comment on constructionModelesLassoRank purpose +void constructionModelesLassoRank( + // IN parameters + const Real* Pi, // parametre initial des proportions + const Real* Rho, // parametre initial de variance renormalisé + Int mini, // nombre minimal d'itérations dans l'algorithme EM + Int maxi, // nombre maximal d'itérations dans l'algorithme EM + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + const Int* A1, // matrice des coefficients des parametres selectionnes + Int rangmin, //rang minimum autorisé + Int rangmax, //rang maximum autorisé + // OUT parameters (all pointers, to be modified) + Real* phi, // estimateur ainsi calculé par le Lasso + Real* lvraisemblance, // estimateur ainsi calculé par le Lasso + // additional size parameters + mwSize n, // taille de l'echantillon + mwSize p, // nombre de covariables + mwSize m, // taille de Y (multivarié) + mwSize k, // nombre de composantes + mwSize L) // taille de glambda +{ + //On cherche les rangs possiblement intéressants + Int deltaRank = rangmax-rangmin+1; + mwSize Size = (mwSize)pow(deltaRank,k); + Int* Rank = (Int*)malloc(Size*k*sizeof(Int)); + for (mwSize r=0; r<k; r++) + { + //On veut le tableau de toutes les combinaisons de rangs possibles + //Dans la première colonne : on répète (rangmax-rangmin)^(k-1) chaque chiffre : ca remplit la colonne + //Dans la deuxieme : on répète (rangmax-rangmin)^(k-2) chaque chiffre, et on fait ca (rangmax-rangmin)^2 fois + //... + //Dans la dernière, on répète chaque chiffre une fois, et on fait ca (rangmin-rangmax)^(k-1) fois. + Int indexInRank = 0; + Int value = 0; + while (indexInRank < Size) + { + for (Int u=0; u<pow(deltaRank,k-r-1); u++) + Rank[(indexInRank++)*k+r] = rangmin + value; + value = (value+1) % deltaRank; + } + } + + //Initialize phi to zero, because unactive variables won't be assigned + for (mwSize i=0; i<p*m*k*L*Size; i++) + phi[i] = 0.0; + + //initiate parallel section + mwSize lambdaIndex; + omp_set_num_threads(OMP_NUM_THREADS); + #pragma omp parallel default(shared) private(lambdaIndex) + { + #pragma omp for schedule(dynamic,CHUNK_SIZE) nowait + for (lambdaIndex=0; lambdaIndex<L; lambdaIndex++) + { + //On ne garde que les colonnes actives : active sera l'ensemble des variables informatives + Int* active = (Int*)malloc(p*sizeof(Int)); + mwSize longueurActive = 0; + for (Int j=0; j<p; j++) + { + if (A1[j*L+lambdaIndex] != 0) + active[longueurActive++] = A1[j*L+lambdaIndex] - 1; + } + + if (longueurActive == 0) + continue; + + //from now on, longueurActive > 0 + Real* phiLambda = (Real*)malloc(longueurActive*m*k*sizeof(Real)); + Real LLF; + for (Int j=0; j<Size; j++) + { + //[phiLambda,LLF] = EMGrank(Pi(:,lambdaIndex),Rho(:,:,:,lambdaIndex),mini,maxi,X(:,active),Y,tau,Rank(j,:)); + Int* rank = (Int*)malloc(k*sizeof(Int)); + for (mwSize r=0; r<k; r++) + rank[r] = Rank[j*k+r]; + Real* Xactive = (Real*)malloc(n*longueurActive*sizeof(Real)); + for (mwSize i=0; i<n; i++) + { + for (mwSize jj=0; jj<longueurActive; jj++) + Xactive[i*longueurActive+jj] = X[i*p+active[jj]]; + } + Real* PiLambda = (Real*)malloc(k*sizeof(Real)); + for (mwSize r=0; r<k; r++) + PiLambda[r] = Pi[r*L+lambdaIndex]; + Real* RhoLambda = (Real*)malloc(m*m*k*sizeof(Real)); + for (mwSize u=0; u<m; u++) + { + for (mwSize v=0; v<m; v++) + { + for (mwSize r=0; r<k; r++) + RhoLambda[u*m*k+v*k+r] = Rho[u*m*k*L+v*k*L+r*L+lambdaIndex]; + } + } + EMGrank(PiLambda,RhoLambda,mini,maxi,Xactive,Y,tau,rank, + phiLambda,&LLF, + n,longueurActive,m,k); + free(rank); + free(Xactive); + free(PiLambda); + free(RhoLambda); + //lvraisemblance((lambdaIndex-1)*Size+j,:) = [LLF, dot(Rank(j,:), length(active)-Rank(j,:)+m)]; + lvraisemblance[(lambdaIndex*Size+j)*2] = LLF; + //dot(Rank(j,:), length(active)-Rank(j,:)+m) + Real dotProduct = 0.0; + for (mwSize r=0; r<k; r++) + dotProduct += Rank[j*k+r] * (longueurActive-Rank[j*k+r]+m); + lvraisemblance[(lambdaIndex*Size+j)*2+1] = dotProduct; + //phi(active,:,:,(lambdaIndex-1)*Size+j) = phiLambda; + for (mwSize jj=0; jj<longueurActive; jj++) + { + for (mwSize mm=0; mm<m; mm++) + { + for (mwSize r=0; r<k; r++) + phi[active[jj]*m*k*L*Size+mm*k*L*Size+r*L*Size+(lambdaIndex*Size+j)] = phiLambda[jj*m*k+mm*k+r]; + } + } + } + free(active); + free(phiLambda); + } + } + free(Rank); +} diff --git a/ProcLassoRank/constructionModelesLassoRank.h b/ProcLassoRank/constructionModelesLassoRank.h new file mode 100644 index 0000000..3cecfab --- /dev/null +++ b/ProcLassoRank/constructionModelesLassoRank.h @@ -0,0 +1,29 @@ +#ifndef select_constructionModelesLassoRank_H +#define select_constructionModelesLassoRank_H + +#include "ioutils.h" + +// Main job on raw inputs (after transformation from mxArray) +void constructionModelesLassoRank( + // IN parameters + const Real* Pi, + const Real* Rho, + Int mini, + Int maxi, + const Real* X, + const Real* Y, + Real tau, + const Int* A1, + Int rangmin, + Int rangmax, + // OUT parameters + Real* phi, + Real* lvraisemblance, + // additional size parameters + mwSize n, + mwSize p, + mwSize m, + mwSize k, + mwSize L); + +#endif diff --git a/ProcLassoRank/constructionModelesLassoRank.m b/ProcLassoRank/constructionModelesLassoRank.m new file mode 100644 index 0000000..ae5e34e --- /dev/null +++ b/ProcLassoRank/constructionModelesLassoRank.m @@ -0,0 +1,40 @@ +function[phi,lvraisemblance] = constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax) + + PI = 4.0 * atan(1.0); + + %get matrix sizes + [n,p] = size(X); + [~,m,k,~] = size(Rho); + L = size(A1, 2); %A1 est p x m+1 x L ou p x L ?! + + %On cherche les rangs possiblement intéressants + deltaRank = rangmax - rangmin + 1; + Size = deltaRank^k; + Rank = zeros(Size,k,'int64'); + for r=1:k + %On veut le tableau de toutes les combinaisons de rangs possibles + %Dans la première colonne : on répète (rangmax-rangmin)^(k-1) chaque chiffre : ca remplit la colonne + %Dans la deuxieme : on répète (rangmax-rangmin)^(k-2) chaque chiffre, et on fait ca (rangmax-rangmin)^2 fois + %... + %Dans la dernière, on répète chaque chiffre une fois, et on fait ca (rangmin-rangmax)^(k-1) fois. + Rank(:,r) = rangmin + reshape(repmat(0:(deltaRank-1), deltaRank^(k-r), deltaRank^(r-1)), Size, 1); + end + + %output parameters + phi = zeros(p,m,k,L*Size); + lvraisemblance = zeros(L*Size,2); + for lambdaIndex=1:L + %On ne garde que les colonnes actives + %active sera l'ensemble des variables informatives + active = A1(:,lambdaIndex); + active(active==0) = []; + if length(active) > 0 + for j=1:Size + [phiLambda,LLF] = EMGrank(Pi(:,lambdaIndex),Rho(:,:,:,lambdaIndex),mini,maxi,X(:,active),Y,tau,Rank(j,:)); + lvraisemblance((lambdaIndex-1)*Size+j,:) = [LLF, sum(Rank(j,:) .* (length(active)-Rank(j,:)+m))]; + phi(active,:,:,(lambdaIndex-1)*Size+j) = phiLambda; + end + end + end + +end diff --git a/ProcLassoRank/constructionModelesLassoRank_interface.c b/ProcLassoRank/constructionModelesLassoRank_interface.c new file mode 100644 index 0000000..96908c2 --- /dev/null +++ b/ProcLassoRank/constructionModelesLassoRank_interface.c @@ -0,0 +1,109 @@ +#include "ioutils.h" +#include "constructionModelesLassoRank.h" +#include <mex.h> + +#include <stdio.h> + +// nlhs, nrhs: resp. numbers of out and in parameters. +// plhs: array of out parameters, each being a mxArray +// plhs: array of in parameters (immutable), each being a mxArray +// +// MATLAB translates a call [A,B] = fun(C,D) into mexFunction(2,{A,B},2,{C,D}). +// Then mxArrayS are adapted to be passed to a regular C function, +// and the results are translated back to mxArrayS into plhs. +void mexFunction(int nlhs, mxArray* plhs[], + int nrhs, const mxArray* prhs[]) +{ + // Basic sanity checks + if (nrhs!=10) + mexErrMsgIdAndTxt("select:constructionModelesLassoRank:nrhs","10 inputs required."); + if (nlhs!=2) + mexErrMsgIdAndTxt("select:constructionModelesLassoRank:nlhs","3 outputs required."); + + // Get matrices dimensions, to be given to main routine above + const mwSize n = mxGetDimensions(prhs[4])[0]; + const mwSize p = mxGetDimensions(prhs[4])[1]; + const mwSize m = mxGetDimensions(prhs[1])[0]; + const mwSize k = mxGetDimensions(prhs[1])[2]; + const mwSize L = mxGetDimensions(prhs[7])[1]; + + //////////// + // INPUTS // + //////////// + + // pi + const mwSize* dimPi = mxGetDimensions(prhs[0]); + Real* brPi = matlabToBrArray_real(mxGetPr(prhs[0]), dimPi, 2); + + // rho + const mwSize* dimRho = mxGetDimensions(prhs[1]); + Real* brRho = matlabToBrArray_real(mxGetPr(prhs[1]), dimRho, 4); + + // min number of iterations + Int mini = ((Int*)mxGetData(prhs[2]))[0]; + + // max number of iterations + Int maxi = ((Int*)mxGetData(prhs[3]))[0]; + + // X + const mwSize* dimX = mxGetDimensions(prhs[4]); + Real* brX = matlabToBrArray_real(mxGetPr(prhs[4]), dimX, 2); + + // Y + const mwSize* dimY = mxGetDimensions(prhs[5]); + Real* brY = matlabToBrArray_real(mxGetPr(prhs[5]), dimY, 2); + + // tau + Real tau = mxGetScalar(prhs[6]); + + // A1 + const mwSize* dimA = mxGetDimensions(prhs[7]); + Int* brA1 = matlabToBrArray_int(mxGetData(prhs[7]), dimA, 2); + + //rangmin + Int rangmin = ((Int*)mxGetData(prhs[8]))[0]; + + //rangmax + Int rangmax = ((Int*)mxGetData(prhs[9]))[0]; + + ///////////// + // OUTPUTS // + ///////////// + + // phi + mwSize Size = pow(rangmax-rangmin+1,k); + const mwSize dimPhi[] = {p, m, k, L*Size}; + plhs[0] = mxCreateNumericArray(4,dimPhi,mxDOUBLE_CLASS,mxREAL); + Real* phi = mxGetPr(plhs[0]); + + // lvraisemblance + const mwSize dimLvraisemblance[] = {L*Size, 2}; + plhs[1] = mxCreateNumericMatrix(dimLvraisemblance[0],dimLvraisemblance[1],mxDOUBLE_CLASS,mxREAL); + Real* lvraisemblance = mxGetPr(plhs[1]); + + + ////////////////////////////////////////// + // Call to constructionModelesLassoRank // + ////////////////////////////////////////// + + constructionModelesLassoRank( + brPi,brRho,mini,maxi,brX,brY,tau,brA1,rangmin,rangmax, + phi,lvraisemblance, + n,p,m,k,L); + + free(brPi); + free(brRho); + free(brX); + free(brY); + free(brA1); + + //post-processing: convert by-rows outputs to MATLAB matrices + Real* mlPhi = brToMatlabArray_real(phi, dimPhi, 4); + copyArray(mlPhi, phi, dimPhi[0]*dimPhi[1]*dimPhi[2]*dimPhi[3]); + free(mlPhi); + + Real* mlLvraisemblance = brToMatlabArray_real(lvraisemblance, dimLvraisemblance, 2); + copyArray(mlLvraisemblance, lvraisemblance, dimLvraisemblance[0]*dimLvraisemblance[1]); + free(mlLvraisemblance); + +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..b48c505 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# model SELECTion + +This code is the applied part of the PhD thesis of [Emilie Devijver](http://www.math.u-psud.fr/~devijver/). + +## Description + +The function selmix delivers a multivariate Gaussian mixture in regression model collection. +According to the parameter estimation, we can compute classical model selection criterion, as BIC or AIC, or slope heuristic, using the CAPUSHE package. +The methodology used is described in 'Model-Based Clustering for High-Dimensional Data. Application to Functional Data.', +available at [this location](https://hal.archives-ouvertes.fr/hal-01060063) + +## Arguments + +Regressors, denoted by X (of size n x p) and responses, denoted by Y (of size n x q) are must-have arguments. + +Optionally, we could add + +* gamma: weight power in the Lasso penalty (according to Stadler et al., $\gamma \in \{0,1/2,1\}$; +* mini: the minimum number of iterations; +* maxi: the maximum number of iterations; +* tau: the threshold for stopping EM algorithm; +* kmin and kmax: the bounds of interesting number of components, +* rangmin and rangmax: the bounds of interesting rank values. + +## Usage + + objet = selmix(X,Y) + objet.run(index) + +For index=1, it computes the Lasso-MLE procedure. +For index=2, it computes the Lasso-Rank procedure. + +/!\ Be careful to the current path /!\ + +## Values + +* phiInit, rhoInit, piInit, gamInit: the initialization of the matrices phi, rho, pi and gamma, +* gridLambda: grid of regularization parameters used to select relevant variables (if kmax-kmin=0, it is, if not, it is the last grid of regularization parameters) +* A1,A2: indices of variables selected or not selected (matrices of size (p+1) x q x size(gridLambda)) +* Phi,Rho,Pi: estimations of each parameter thanks to the procedure LassoMLE if compute index=1, and thanks to the procedure LassoRank if computed index=2. + + +## Example + + n=10; + p=10; + q=5; + X=randn(n,p); + Y=randn(n,q); + + objet=selmix(X,Y); + objet.run(1); + objet.run(2); diff --git a/SelectModel/selectionModelesLassoMLE.m b/SelectModel/selectionModelesLassoMLE.m new file mode 100644 index 0000000..ab39371 --- /dev/null +++ b/SelectModel/selectionModelesLassoMLE.m @@ -0,0 +1,12 @@ +%Selection de modèle dans la procédure Lasso-MLE +% Selection de modele : voici les parametres selectionnes pour chaque +% dimension, et l'indice associe +[indiceLassoMLE,D1LassoMLE] = selectionmodele(vraisLassoMLE); + +%on veut tracer la courbe +%figure(2) +%plot(D1LassoMLE/n,vraisLassoMLE(indiceLassoMLE),'.') +%on veut appliquer Capushe : il nous faut un tableau de données +tableauLassoMLE = enregistrerdonnees(D1LassoMLE,vraisLassoMLE(indiceLassoMLE,:),n); +save tableauLassoMLE.mat tableauLassoMLE +%On conclut avec Capushe ! diff --git a/SelectModel/selectionModelesLassoRank.m b/SelectModel/selectionModelesLassoRank.m new file mode 100644 index 0000000..aacc460 --- /dev/null +++ b/SelectModel/selectionModelesLassoRank.m @@ -0,0 +1,11 @@ +%Selection de modèle dans la procedure Lasso Rank +vraisLassoRank=vraisLassoRank'; +% Selection de modele : voici les parametres selectionnes pour chaque +% dimension, et l'indice associe +[indiceLassoRank,D1LassoRank]=selectionmodele(vraisLassoRank); +tableauLassoRank=enregistrerdonnees(D1LassoRank,vraisLassoRank(indiceLassoRank,:),n); +%On veut tracer la courbe des pentes +%figure(2) +%plot(D1LassoRank/n,vraisLassoRank(indiceLassoRank),'.') +save tableauLassoRank.mat tableauLassoRank +%On conclut avec Capushe ! diff --git a/SelectModel/selectiondindice.m b/SelectModel/selectiondindice.m new file mode 100644 index 0000000..4ef8b96 --- /dev/null +++ b/SelectModel/selectiondindice.m @@ -0,0 +1,19 @@ +%utile dans selectiontotale.m, remarque quels coefficients sont nuls et lesquels ne le sont pas +function[A,B]=selectiondindice(phi,seuil) + + [pp,m,~]=size(phi); + A=zeros(pp,m); + B=zeros(pp,m); + for j=1:pp + cpt=0;cpt2=0; + for mm=1:m + if (max(phi(j,mm,:)) > seuil) + cpt=cpt+1; + A(j,cpt)=mm; + else cpt2=cpt2+1; + B(j,cpt2)=mm; + end + end + end + +end diff --git a/SelectModel/selectionmodele.m b/SelectModel/selectionmodele.m new file mode 100644 index 0000000..33828ab --- /dev/null +++ b/SelectModel/selectionmodele.m @@ -0,0 +1,20 @@ +function [indice,D1]=selectionmodele(vraisemblance) + + D=vraisemblance(:,2); + [D1]=unique(D); + indice=ones(1,length(D1)); + %On ne sélectionne que celui qui maximise : l'EMV + if length(D1)>2 + for i=1:length(D1) + a=[]; + for j=1:length(D) + if D(j)==D1(i) + a=[a,vraisemblance(j,1)]; + end + end + b=max(a); + indice(i)=find(vraisemblance(:,1)==b,1); + end + end + +end diff --git a/SelectModel/suppressionmodelesegaux.m b/SelectModel/suppressionmodelesegaux.m new file mode 100644 index 0000000..3d2a3a9 --- /dev/null +++ b/SelectModel/suppressionmodelesegaux.m @@ -0,0 +1,18 @@ +function[B1,B2,glambda,ind,rho,pi]=suppressionmodelesegaux(B1,B2,glambda,rho,pi) + + ind=[]; + for l=1:length(glambda) + for ll=1:l-1 + if B1(:,:,l)==B1(:,:,ll) + ind=[ind l]; + end + end + end + ind=unique(ind); + B1(:,:,ind)=[]; + glambda(ind)=[]; + B2(:,:,ind)=[]; + rho(:,:,:,ind)=[]; + pi(:,ind)=[]; + +end diff --git a/SelectModel/suppressionmodelesegaux2.m b/SelectModel/suppressionmodelesegaux2.m new file mode 100644 index 0000000..3158d36 --- /dev/null +++ b/SelectModel/suppressionmodelesegaux2.m @@ -0,0 +1,17 @@ +function[B1,ind,rho,pi]=suppressionmodelesegaux2(B1,rho,pi) + + ind=[]; + nombreLambda=size(B1,2); + for l=1:nombreLambda + for ll=1:l-1 + if B1(:,l)==B1(:,ll) + ind=[ind l]; + end + end + end + ind=unique(ind); + B1(:,ind)=[]; + rho(:,:,:,ind)=[]; + pi(:,ind)=[]; + +end diff --git a/Util/ioutils.c b/Util/ioutils.c new file mode 100644 index 0000000..36a1d8e --- /dev/null +++ b/Util/ioutils.c @@ -0,0 +1,168 @@ +#include "ioutils.h" +#include <string.h> +#include <stdio.h> + +// Check if array == refArray +void compareArray(const char* ID, const void* array, const void* refArray, mwSize size, int isInteger) +{ + Real EPS = 1e-5; //precision + printf("Checking %s\n",ID); + Real maxError = 0.0; + for (mwSize i=0; i<size; i++) + { + Real error = isInteger + ? fabs(((Int*)array)[i] - ((Int*)refArray)[i]) + : fabs(((Real*)array)[i] - ((Real*)refArray)[i]); + if (error >= maxError) + maxError = error; + } + if (maxError >= EPS) + printf(" Inaccuracy: max(abs(error)) = %g >= %g\n",maxError,EPS); + else + printf(" OK\n"); +} + +// Next function is a generalization of : +//~ Real* brToMatlabArray(Real* brArray, int dimX, int dimY, int dimZ, int dimW) +//~ { + //~ Real* matlabArray = (Real*)malloc(dimX*dimY*dimZ*dimW*sizeof(Real)); + //~ for (int u=0; u<dimX*dimY*dimZ*dimW; u++) + //~ { + //~ int xIndex = u / (dimY*dimZ*dimW); + //~ int yIndex = (u % (dimY*dimZ*dimW)) / (dimZ*dimW); + //~ int zIndex = (u % (dimZ*dimW)) / dimW; + //~ int wIndex = u % dimW; + //~ matlabArray[xIndex+yIndex*dimX+zIndex*dimX*dimY+wIndex*dimX*dimY*dimZ] = brArray[u]; + //~ } + //~ return matlabArray; +//~ } + +// Auxiliary to convert from ours ("by-rows") encoding to MATLAB +void* brToMatlabArray(const void* brArray, const mwSize* dimensions, int nbDims, int isInteger) +{ + mwSize totalDim = 1; + for (int i=0; i<nbDims; i++) + totalDim *= dimensions[i]; + size_t elementSize = isInteger + ? sizeof(Int) + : sizeof(Real); + + void* matlabArray = malloc(totalDim*elementSize); + for (mwSize u=0; u<totalDim; u++) + { + mwSize prodDimLeft = totalDim; + mwSize prodDimRight = totalDim / dimensions[0]; + mwSize prodDimInIndex = 1; + mwSize index = 0; + for (int v=0; v<nbDims; v++) + { + index += ((u % prodDimLeft) / prodDimRight) * prodDimInIndex; + prodDimInIndex *= dimensions[v]; + prodDimLeft /= dimensions[v]; + if (v+1 < nbDims) + prodDimRight /= dimensions[v+1]; + } + if (isInteger) + ((Int*)matlabArray)[index] = ((Int*)brArray)[u]; + else + ((Real*)matlabArray)[index] = ((Real*)brArray)[u]; + } + return matlabArray; +} + +// Next function is a generalization of : +//~ Real* matlabToBrArray(Real* matlabArray, int dimX, int dimY, int dimZ, int dimU) +//~ { + //~ Real* brArray = (Real*)malloc(dimX*dimY*dimZ*dimU*sizeof(Real)); + //~ for (int u=0; u<dimX*dimY*dimZ*dimU; u++) + //~ { + //~ int xIndex = u % dimX; + //~ int yIndex = (u % (dimX*dimY)) / dimX; + //~ int zIndex = (u % (dimX*dimY*dimZ)) / (dimX*dimY); + //~ int uIndex = u / (dimX*dimY*dimZ); + //~ brArray[xIndex*dimY*dimZ*dimU+yIndex*dimZ*dimU+zIndex*dimU+uIndex] = matlabArray[u]; + //~ } + //~ return brArray; +//~ } + +// Auxiliary to convert from MATLAB encoding to ours ("by-rows") +void* matlabToBrArray(const void* matlabArray, const mwSize* dimensions, int nbDims, int isInteger) +{ + mwSize totalDim = 1; + for (int i=0; i<nbDims; i++) + totalDim *= dimensions[i]; + size_t elementSize = isInteger + ? sizeof(Int) + : sizeof(Real); + + void* brArray = malloc(totalDim*elementSize); + for (mwSize u=0; u<totalDim; u++) + { + mwSize prodDimLeft = dimensions[0]; + mwSize prodDimRight = 1; + mwSize prodDimInIndex = totalDim / dimensions[0]; + mwSize index = 0; + for (int v=0; v<nbDims; v++) + { + index += ((u % prodDimLeft) / prodDimRight) * prodDimInIndex; + if (v+1 < nbDims) + { + prodDimInIndex /= dimensions[v+1]; + prodDimLeft *= dimensions[v+1]; + } + prodDimRight *= dimensions[v]; + } + if (isInteger) + ((Int*)brArray)[index] = ((Int*)matlabArray)[u]; + else + ((Real*)brArray)[index] = ((Real*)matlabArray)[u]; + } + return brArray; +} + +// Read array by columns (as in MATLAB) and return by-rows encoding +void* readArray(const char* fileName, const mwSize* dimensions, int nbDims, int isInteger) +{ + // need to prepend '../data/' (not really nice code...) + char* fullFileName = (char*)calloc(8+strlen(fileName)+1,sizeof(char)); + strcat(fullFileName, "../data/"); + strcat(fullFileName, fileName); + FILE* file = fopen(fullFileName, "r"); + free(fullFileName); + + mwSize totalDim = 1; + for (int i=0; i<nbDims; i++) + totalDim *= dimensions[i]; + size_t elementSize = isInteger + ? sizeof(Int) + : sizeof(Real); + + // read all values, and convert them to by-rows matrices format + void* matlabArray = malloc(totalDim*elementSize); + char curChar = ' '; + char bufferNum[64]; + for (mwSize u=0; u<totalDim; u++) + { + // position to next non-separator character + while (!feof(file) && (curChar==' ' || curChar=='\n' || curChar=='\t' || curChar==',')) + curChar = fgetc(file); + // read number (as a string) + int bufferIndex = 0; + while (!feof(file) && curChar!=' ' && curChar!='\n' && curChar!='\t' && curChar!=',') + { + bufferNum[bufferIndex++] = curChar; + curChar = fgetc(file); + } + bufferNum[bufferIndex] = 0; + // transform string into Real, and store it at appropriate location + if (isInteger) + ((Int*)matlabArray)[u] = atoi(bufferNum); + else + ((Real*)matlabArray)[u] = atof(bufferNum); + } + fclose(file); + + void* brArray = matlabToBrArray(matlabArray, dimensions, nbDims, isInteger); + free(matlabArray); + return brArray; +} diff --git a/Util/ioutils.h b/Util/ioutils.h new file mode 100644 index 0000000..31f464a --- /dev/null +++ b/Util/ioutils.h @@ -0,0 +1,75 @@ +#ifndef select_ioutils_H +#define select_ioutils_H + +#include <stdlib.h> +#include <math.h> +#include <stdint.h> +#include <uchar.h> //for type wchar16_t + +// Include header for mwSize type +#ifdef Octave +#include <mex.h> +#else +#include <tmwtypes.h> +#endif + +// CHUNK_SIZE = number of lambda values to be treated sequentially by a single core +#define CHUNK_SIZE 1 + +// integer type chosen in MATLAB (to be tuned: 32 bits should be enough) +typedef int64_t Int; + +// real number type chosen in MATLAB (default: double) +typedef double Real; + +#ifndef M_PI +#define M_PI 3.141592653589793 +#endif + +// Fill an array with zeros +#define zeroArray(array, size)\ +{\ + for (Int u=0; u<size; u++)\ + array[u] = 0;\ +} + +// Copy an 1D array +#define copyArray(array, copy, size)\ +{\ + for (Int u=0; u<size; u++)\ + copy[u] = array[u];\ +} + +// Check if array == refArray +void compareArray(const char* ID, const void* array, const void* refArray, mwSize size, int isInteger); + +#define compareArray_int(ID, array, refArray, size)\ + compareArray(ID, array, refArray, size, 1) +#define compareArray_real(ID, array, refArray, size)\ + compareArray(ID, array, refArray, size, 0) + +// Auxiliary to convert from ours ("by-rows") encoding to MATLAB +void* brToMatlabArray(const void* brArray, const mwSize* dimensions, int nbDims, int isInteger); + +#define brToMatlabArray_int(brArray, dimensions, nbDims)\ + (Int*)brToMatlabArray(brArray, dimensions, nbDims, 1) +#define brToMatlabArray_real(brArray, dimensions, nbDims)\ + (Real*)brToMatlabArray(brArray, dimensions, nbDims, 0) + +// Auxiliary to convert from MATLAB encoding to ours ("by-rows") +void* matlabToBrArray(const void* matlabArray, const mwSize* dimensions, int nbDims, int isInteger); + +#define matlabToBrArray_int(matlabArray, dimensions, nbDims)\ + (Int*)matlabToBrArray(matlabArray, dimensions, nbDims, 1) +#define matlabToBrArray_real(matlabArray, dimensions, nbDims)\ + (Real*)matlabToBrArray(matlabArray, dimensions, nbDims, 0) + +// Read array by columns (as in MATLAB) and return by-rows encoding +void* readArray(const char* fileName, const mwSize* dimensions, int nbDims, int isInteger); + +#define readArray_int(fileName, dimensions, nbDims)\ + (Int*)readArray(fileName, dimensions, nbDims, 1) +#define readArray_real(fileName, dimensions, nbDims)\ + (Real*)readArray(fileName, dimensions, nbDims, 0) + +#endif diff --git a/Util/omp_num_threads.h b/Util/omp_num_threads.h new file mode 100644 index 0000000..f4fcc24 --- /dev/null +++ b/Util/omp_num_threads.h @@ -0,0 +1,7 @@ +#ifndef select_omp_num_threads_H +#define select_omp_num_threads_H + +//Par exemple... +#define OMP_NUM_THREADS 8 + +#endif diff --git a/selmix.m b/selmix.m new file mode 100644 index 0000000..a4bea19 --- /dev/null +++ b/selmix.m @@ -0,0 +1,194 @@ +classdef selmix < handle + + properties (SetAccess = private) + %always user defined + X; % regression data (size n*p, where n is the number of observations, and p is the number of regressors) + Y; % response data (size n*m, where n is the number of observations, and m is the number of responses) + + %optionally user defined some default values + gamma; % power in the penalty + mini; % minimum number of iterations for EM algorithm + maxi; % maximum number of iterations for EM algorithm + eps; % threshold for stopping EM algorithm + kmin; % minimum number of components in the mixture + kmax; % maximum number of components in the mixture + rangmin; + rangmax; + + %computed through the workflow + phiInit; % initialisation for the reparametrized conditional mean parameter + rhoInit; % initialisation for the reparametrized variance parameter + piInit; % initialisation for the proportions + tauInit; % initialisation for the allocations probabilities in each component + gridLambda = []; % values for the regularization parameter grid + A1; %je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ... + A2; + Phi; % collection of estimations for the reparametrized conditional mean parameters + Rho; % collection of estimations for the reparametrized variance parameters + Pi; % collection of estimations for the proportions parameters + end + + properties (Constant) + %immutable + seuil = 1e-15; + end + + methods + %%%%%%%%%%%%%%%%%%%%%%% + %initialize main object + %%%%%%%%%%%%%%%%%%%%%%% + + function sx = selmix(X,Y,varargin) + %set defaults for optional inputs + optargs = {1.0 5 10 1e-6 2 3 2 3}; + %replace defaults by user parameters + optargs(1:length(varargin)) = varargin; + sx.X = X; + sx.Y = Y; + [sx.gamma,sx.mini,sx.maxi,sx.eps,sx.kmin,sx.kmax,sx.rangmin,sx.rangmax] = optargs{:}; + % z = somme(sx.X,sx.Y); + % sx.Z=z; + end + + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %core workflow: compute all models + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + function initParameters(sx,k) + [phi0,rho0,pi0,tau0] = initSmallEM(k,sx.X,sx.Y,sx.eps); %smallEM initializes parameters by k-means and regression model in each component, doing this 20 times, and keeping the values maximizing the likelihood after 10 iterations of the EM algorithm. + sx.phiInit = phi0; + sx.rhoInit = rho0; + sx.piInit = pi0; + sx.tauInit = tau0; + end + + function computeGridLambda(sx) + [sx.gridLambda] = grillelambda(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.X,sx.Y,sx.gamma,sx.mini,sx.maxi,sx.eps); + % computation of the regularization grid, according to explicit + % formulae given by EM algorithm. + end + + function computeRelevantParameters(sx) + [sx.A1,sx.A2,sx.Rho,sx.Pi] = selectiontotale(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps); + %select variables according to each regularization parameter + %from the grid: sx.A1 corresponding to selected variables, and + %sx.A2 corresponding to unselected variables. + end + + function [sx,phi,rho,pi]=runProcedure1(sx) + [phi,rho,pi,~] = constructionModelesLassoMLE(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps,sx.A1,sx.A2); + %compute parameter estimations, with the Maximum Likelihood + %Estimator, restricted on selected variables. + end + + function [phi] =runProcedure2(sx) + [phi,~] = constructionModelesLassoRank(sx.Pi,sx.Rho,sx.mini,sx.maxi,sx.X,sx.Y,sx.eps,sx.A1,sx.rangmin,sx.rangmax); + %compute parameter estimations, with the Low Rank + %Estimator, restricted on selected variables. + end + + % main loop: over all k and all lambda + function run(sx,procedure) % Run the all procedure, 1 with the + %maximum likelihood refitting, and 2 with the Low Rank refitting. + [p,m,~]=size(sx.phiInit); + for k=sx.kmin:sx.kmax + k + initParameters(sx,k); + computeGridLambda(sx); + computeRelevantParameters(sx); + if (procedure == 1) + [~,phi,rho,pi] = runProcedure1(sx); + Phi2 = sx.Phi; + Rho2 = sx.Rho; + Pi2 = sx.Pi; + p = size(sx.X,2); + m = size(sx.Y,2); + if size(Phi2) == 0 + sx.Phi(:,:,1:k,:) = phi; + sx.Rho(:,:,1:k,:) = rho; + sx.Pi(1:k,:) = pi; + else + sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4)); + sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2; + sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi; + sx.Rho = zeros(m,m,sx.kmax,size(Rho2,4)+size(rho,4)); + sx.Rho(:,:,1:size(Rho2,3),1:size(Rho2,4)) = Rho2; + sx.Rho(:,:,1:k,size(Rho2,4)+1:end) = rho; + sx.Pi = zeros(sx.kmax,size(Pi2,2)+size(pi,2)); + sx.Pi(1:size(Pi2,1),1:size(Pi2,2)) = Pi2; + sx.Pi(1:k,size(Pi2,2)+1:end) = pi; + end + else + [phi] = runProcedure2(sx); + phi + Phi2 = sx.Phi; + if size(Phi2,1) == 0 + sx.Phi(:,:,1:k,:) = phi; + else + size(Phi2) + sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4)); + size(sx.Phi) + sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2; + sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi; + end + + end + + + end + end + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %pruning: select only one (or a few best ?!) model + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % + % function[model] selectModel(sx) + % %TODO + % %model = sxModel(...); + % end + + end + +end + + +%%%%%%%%%%%%% +%OLD VERSION: + +%~ for k=K +%~ % On initialise +%~ initialisation +%~ disp('Initialisé') +%~ % On construit la grille des lambdas : variables informatives +%~ [glambda,phiEmv,rhoEmv,piEmv]=grillelambda(phiInit,rhoInit,piInit,tauInit,x,y,gamma,mini,maxi,tau); +%~ glambda=glambda(1:3); +%~ disp('glambda construite') +%~ % On trouve les variables informatives pour chaque lambda : S est la +%~ % matrice des coefficients des variables informatives +%~ % on parallelise à l interieur du selectiontotale() +%~ [B1,B2,rhoLasso,piLasso]=selectiontotale(phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,glambda,x,y,10^-15,tau); +%~ %S1 les variables informatives, S2 celles qui ne le sont pas +%~ [B1bis,B2bis,glambda2bis,ind,rhoLasso,piLasso]=suppressionmodelesegaux(B1,B2,glambda,rhoLasso,piLasso); +%~ dessinVariablesSelectionnees; +%~ disp('Le Lasso est fait') +%~ % Pour chaque lambda ainsi construit, on veut calculer l'EMV pour la procédure Lasso-MLE +%~ %On obtient une collection de modèles pour Lasso-MLE +%~ % ICI AUSSI ON PEUT PARALLELISER a l interieur de constructionModelesLassoMLE +%~ nombreLambda=size(B1bis,3); +%~ %[phiLassoMLE,rhoLassoMLE,piLassoMLE,vraisLassoMLE]=constructionModelesLassoMLE(phiInit,rhoInit,piInit, tauInit,mini,maxi,gamma,glambda2bis,X,Y,seuil,tau,B1bis,B2bis) +%~ %Pour Lasso-Rank +%~ %on ne garde que les colonnes actives +%~ B1ter=B1bis(:,1,:); +%~ % [B1ter,ind,rhoLasso,piLasso]=suppressionmodelesegaux2(B1ter,rhoLasso,piLasso) +%~ %Pour chaque lambda, on veut calculer l'estimateur low rank pour la procédure Lasso-Rank +%~ %On obtient une collection de modèles pour Lasso-Rank +%~ %ICI AUSSI ON PEUT PARALLELISER a linterieur de constructionModelesLassoRank +%~ nombreLambda2=size(B1ter,2); +%~ [phi,lvraisemblance,Z] = constructionModelesLassoRank(... +%~ piEmv,rhoEmv,mini,maxi,X,Y,tau,B1ter,2,4); +%~ disp('On a construit les modèles pour les deux procédures') +%~ end +%~ %selectionModelesLassoMLE; +%~ %selectionModelesLassoRank; +%~ disp('On a sélectionné les modèles dans les deux procédures') -- 2.44.0