1 ## TODO: turn this code into R
3 classdef selmix < handle
5 properties (SetAccess = private)
7 X; % regression data (size n*p, where n is the number of observations, and p is the number of regressors)
8 Y; % response data (size n*m, where n is the number of observations, and m is the number of responses)
10 %optionally user defined some default values
11 gamma; % power in the penalty
12 mini; % minimum number of iterations for EM algorithm
13 maxi; % maximum number of iterations for EM algorithm
14 eps; % threshold for stopping EM algorithm
15 kmin; % minimum number of components in the mixture
16 kmax; % maximum number of components in the mixture
20 %computed through the workflow
21 phiInit; % initialisation for the reparametrized conditional mean parameter
22 rhoInit; % initialisation for the reparametrized variance parameter
23 piInit; % initialisation for the proportions
24 tauInit; % initialisation for the allocations probabilities in each component
25 gridLambda = []; % values for the regularization parameter grid
26 A1; %je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
28 Phi; % collection of estimations for the reparametrized conditional mean parameters
29 Rho; % collection of estimations for the reparametrized variance parameters
30 Pi; % collection of estimations for the proportions parameters
39 %%%%%%%%%%%%%%%%%%%%%%%
40 %initialize main object
41 %%%%%%%%%%%%%%%%%%%%%%%
43 function sx = selmix(X,Y,varargin)
44 %set defaults for optional inputs
45 optargs = {1.0 5 10 1e-6 2 3 2 3};
46 %replace defaults by user parameters
47 optargs(1:length(varargin)) = varargin;
50 [sx.gamma,sx.mini,sx.maxi,sx.eps,sx.kmin,sx.kmax,sx.rangmin,sx.rangmax] = optargs{:};
51 % z = somme(sx.X,sx.Y);
56 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
57 %core workflow: compute all models
58 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
60 function initParameters(sx,k)
61 [phi0,rho0,pi0,tau0] = initSmallEM(k,sx.X,sx.Y,sx.eps); %smallEM initializes parameters by k-means and regression model in each component, doing this 20 times, and keeping the values maximizing the likelihood after 10 iterations of the EM algorithm.
68 function computeGridLambda(sx)
69 [sx.gridLambda] = grillelambda(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.X,sx.Y,sx.gamma,sx.mini,sx.maxi,sx.eps);
70 % computation of the regularization grid, according to explicit
71 % formulae given by EM algorithm.
74 function computeRelevantParameters(sx)
75 [sx.A1,sx.A2,sx.Rho,sx.Pi] = selectiontotale(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps);
76 %select variables according to each regularization parameter
77 %from the grid: sx.A1 corresponding to selected variables, and
78 %sx.A2 corresponding to unselected variables.
81 function [sx,phi,rho,pi]=runProcedure1(sx)
82 [phi,rho,pi,~] = constructionModelesLassoMLE(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps,sx.A1,sx.A2);
83 %compute parameter estimations, with the Maximum Likelihood
84 %Estimator, restricted on selected variables.
87 function [phi] =runProcedure2(sx)
88 [phi,~] = constructionModelesLassoRank(sx.Pi,sx.Rho,sx.mini,sx.maxi,sx.X,sx.Y,sx.eps,sx.A1,sx.rangmin,sx.rangmax);
89 %compute parameter estimations, with the Low Rank
90 %Estimator, restricted on selected variables.
93 % main loop: over all k and all lambda
94 function run(sx,procedure) % Run the all procedure, 1 with the
95 %maximum likelihood refitting, and 2 with the Low Rank refitting.
96 [p,m,~]=size(sx.phiInit);
100 computeGridLambda(sx);
101 computeRelevantParameters(sx);
103 [~,phi,rho,pi] = runProcedure1(sx);
110 sx.Phi(:,:,1:k,:) = phi;
111 sx.Rho(:,:,1:k,:) = rho;
114 sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4));
115 sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
116 sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi;
117 sx.Rho = zeros(m,m,sx.kmax,size(Rho2,4)+size(rho,4));
118 sx.Rho(:,:,1:size(Rho2,3),1:size(Rho2,4)) = Rho2;
119 sx.Rho(:,:,1:k,size(Rho2,4)+1:end) = rho;
120 sx.Pi = zeros(sx.kmax,size(Pi2,2)+size(pi,2));
121 sx.Pi(1:size(Pi2,1),1:size(Pi2,2)) = Pi2;
122 sx.Pi(1:k,size(Pi2,2)+1:end) = pi;
125 [phi] = runProcedure2(sx);
129 sx.Phi(:,:,1:k,:) = phi;
132 sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4));
134 sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
135 sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi;
144 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
145 %pruning: select only one (or a few best ?!) model
146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148 % function[model] selectModel(sx)
150 % %model = sxModel(...);
164 %~ disp('Initialisé')
165 %~ % On construit la grille des lambdas : variables informatives
166 %~ [glambda,phiEmv,rhoEmv,piEmv]=grillelambda(phiInit,rhoInit,piInit,tauInit,x,y,gamma,mini,maxi,tau);
167 %~ glambda=glambda(1:3);
168 %~ disp('glambda construite')
169 %~ % On trouve les variables informatives pour chaque lambda : S est la
170 %~ % matrice des coefficients des variables informatives
171 %~ % on parallelise à l interieur du selectiontotale()
172 %~ [B1,B2,rhoLasso,piLasso]=selectiontotale(phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,glambda,x,y,10^-15,tau);
173 %~ %S1 les variables informatives, S2 celles qui ne le sont pas
174 %~ [B1bis,B2bis,glambda2bis,ind,rhoLasso,piLasso]=suppressionmodelesegaux(B1,B2,glambda,rhoLasso,piLasso);
175 %~ dessinVariablesSelectionnees;
176 %~ disp('Le Lasso est fait')
177 %~ % Pour chaque lambda ainsi construit, on veut calculer l'EMV pour la procédure Lasso-MLE
178 %~ %On obtient une collection de modèles pour Lasso-MLE
179 %~ % ICI AUSSI ON PEUT PARALLELISER a l interieur de constructionModelesLassoMLE
180 %~ nombreLambda=size(B1bis,3);
181 %~ %[phiLassoMLE,rhoLassoMLE,piLassoMLE,vraisLassoMLE]=constructionModelesLassoMLE(phiInit,rhoInit,piInit, tauInit,mini,maxi,gamma,glambda2bis,X,Y,seuil,tau,B1bis,B2bis)
183 %~ %on ne garde que les colonnes actives
184 %~ B1ter=B1bis(:,1,:);
185 %~ % [B1ter,ind,rhoLasso,piLasso]=suppressionmodelesegaux2(B1ter,rhoLasso,piLasso)
186 %~ %Pour chaque lambda, on veut calculer l'estimateur low rank pour la procédure Lasso-Rank
187 %~ %On obtient une collection de modèles pour Lasso-Rank
188 %~ %ICI AUSSI ON PEUT PARALLELISER a linterieur de constructionModelesLassoRank
189 %~ nombreLambda2=size(B1ter,2);
190 %~ [phi,lvraisemblance,Z] = constructionModelesLassoRank(...
191 %~ piEmv,rhoEmv,mini,maxi,X,Y,tau,B1ter,2,4);
192 %~ disp('On a construit les modèles pour les deux procédures')
194 %~ %selectionModelesLassoMLE;
195 %~ %selectionModelesLassoRank;
196 %~ disp('On a sélectionné les modèles dans les deux procédures')