[valse.git] / selmix.m

classdef selmix < handle
    
    properties (SetAccess = private)
        %always user defined
        X; % regression data (size n*p, where n is the number of observations, and p is the number of regressors)
        Y; % response data (size n*m, where n is the number of observations, and m is the number of responses)
        
        %optionally user defined some default values
        gamma; % power in the penalty
        mini; % minimum number of iterations for EM algorithm
        maxi; % maximum number of iterations for EM algorithm
        eps; % threshold for stopping EM algorithm
        kmin; % minimum number of components in the mixture
        kmax; % maximum number of components in the mixture
        rangmin;
        rangmax;
        
        %computed through the workflow
        phiInit; % initialisation for the reparametrized conditional mean parameter
        rhoInit; % initialisation for the reparametrized variance parameter
        piInit; % initialisation for the proportions
        tauInit; % initialisation for the allocations probabilities in each component
        gridLambda = []; % values for the regularization parameter grid
        A1; %je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
        A2;
        Phi; % collection of estimations for the reparametrized conditional mean parameters
        Rho; % collection of estimations for the reparametrized variance parameters
        Pi; % collection of estimations for the proportions parameters
    end
    
    properties (Constant)
        %immutable
        seuil = 1e-15;
    end
    
    methods
        %%%%%%%%%%%%%%%%%%%%%%%
        %initialize main object
        %%%%%%%%%%%%%%%%%%%%%%%
        
        function sx = selmix(X,Y,varargin)
            %set defaults for optional inputs
            optargs = {1.0 5 10 1e-6 2 3 2 3};
            %replace defaults by user parameters
            optargs(1:length(varargin)) = varargin;
            sx.X = X;
            sx.Y = Y;
            [sx.gamma,sx.mini,sx.maxi,sx.eps,sx.kmin,sx.kmax,sx.rangmin,sx.rangmax] = optargs{:};
            %			z = somme(sx.X,sx.Y);
            %			sx.Z=z;
        end
        
        
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        %core workflow: compute all models
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        
        function initParameters(sx,k)
            [phi0,rho0,pi0,tau0] = initSmallEM(k,sx.X,sx.Y,sx.eps); %smallEM initializes parameters by k-means and regression model in each component, doing this 20 times, and keeping the values maximizing the likelihood after 10 iterations of the EM algorithm.
            sx.phiInit = phi0;
            sx.rhoInit = rho0;
            sx.piInit  = pi0;
            sx.tauInit = tau0;
        end
        
        function computeGridLambda(sx)
            [sx.gridLambda] = grillelambda(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.X,sx.Y,sx.gamma,sx.mini,sx.maxi,sx.eps);
            % computation of the regularization grid, according to explicit
            % formulae given by EM algorithm.
        end
        
        function computeRelevantParameters(sx)
            [sx.A1,sx.A2,sx.Rho,sx.Pi] = selectiontotale(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps);
            %select variables according to each regularization parameter
            %from the grid: sx.A1 corresponding to selected variables, and
            %sx.A2 corresponding to unselected variables.
        end
        
        function [sx,phi,rho,pi]=runProcedure1(sx)
            [phi,rho,pi,~] = constructionModelesLassoMLE(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps,sx.A1,sx.A2);
            %compute parameter estimations, with the Maximum Likelihood
            %Estimator, restricted on selected variables.
        end
        
        function [phi] =runProcedure2(sx)
            [phi,~] = constructionModelesLassoRank(sx.Pi,sx.Rho,sx.mini,sx.maxi,sx.X,sx.Y,sx.eps,sx.A1,sx.rangmin,sx.rangmax);
            %compute parameter estimations, with the Low Rank
            %Estimator, restricted on selected variables.
        end
        
        % main loop: over all k and all lambda
        function run(sx,procedure) % Run the all procedure, 1 with the
            %maximum likelihood refitting, and 2 with the Low Rank refitting.
            [p,m,~]=size(sx.phiInit);
            for k=sx.kmin:sx.kmax
                k
                initParameters(sx,k);
                computeGridLambda(sx);
                computeRelevantParameters(sx);
                if (procedure == 1)
                    [~,phi,rho,pi] = runProcedure1(sx);
                    Phi2 = sx.Phi;
                    Rho2 = sx.Rho;
                    Pi2 = sx.Pi;
                    p = size(sx.X,2);
                    m = size(sx.Y,2);
                    if size(Phi2) == 0
                        sx.Phi(:,:,1:k,:) = phi;
                        sx.Rho(:,:,1:k,:) = rho;
                        sx.Pi(1:k,:) = pi;
                    else
                        sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4));
                        sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
                        sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi;
                        sx.Rho = zeros(m,m,sx.kmax,size(Rho2,4)+size(rho,4));
                        sx.Rho(:,:,1:size(Rho2,3),1:size(Rho2,4)) = Rho2;
                        sx.Rho(:,:,1:k,size(Rho2,4)+1:end) = rho;
                        sx.Pi = zeros(sx.kmax,size(Pi2,2)+size(pi,2));
                        sx.Pi(1:size(Pi2,1),1:size(Pi2,2)) = Pi2;
                        sx.Pi(1:k,size(Pi2,2)+1:end) = pi;
                    end
                else
                    [phi] = runProcedure2(sx);
                    phi
                    Phi2 = sx.Phi;
                    if size(Phi2,1) == 0
                        sx.Phi(:,:,1:k,:) = phi;
                    else
                        size(Phi2)
                        sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4));
                        size(sx.Phi)
                        sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
                        sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi;
                    end
                    
                end
                
                
            end
        end
        
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        %pruning: select only one (or a few best ?!) model
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        %
        % 		function[model] selectModel(sx)
        % 			%TODO
        % 			%model = sxModel(...);
        % 		end
        
    end
    
end


%%%%%%%%%%%%%
%OLD VERSION:

%~ for k=K
%~ % On initialise
%~ initialisation
%~ disp('Initialisé')
%~ % On construit la grille des lambdas : variables informatives
%~ [glambda,phiEmv,rhoEmv,piEmv]=grillelambda(phiInit,rhoInit,piInit,tauInit,x,y,gamma,mini,maxi,tau);
%~ glambda=glambda(1:3);
%~ disp('glambda construite')
%~ % On trouve les variables informatives pour chaque lambda : S est la
%~ % matrice des coefficients des variables informatives
%~ % on parallelise à l interieur du selectiontotale()
%~ [B1,B2,rhoLasso,piLasso]=selectiontotale(phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,glambda,x,y,10^-15,tau);
%~ %S1 les variables informatives, S2 celles qui ne le sont pas
%~ [B1bis,B2bis,glambda2bis,ind,rhoLasso,piLasso]=suppressionmodelesegaux(B1,B2,glambda,rhoLasso,piLasso);
%~ dessinVariablesSelectionnees;
%~ disp('Le Lasso est fait')
%~ % Pour chaque lambda ainsi construit, on veut calculer l'EMV pour la procédure Lasso-MLE
%~ %On obtient une collection de modèles pour Lasso-MLE
%~ % ICI AUSSI ON PEUT PARALLELISER a l interieur de constructionModelesLassoMLE
%~ nombreLambda=size(B1bis,3);
%~ %[phiLassoMLE,rhoLassoMLE,piLassoMLE,vraisLassoMLE]=constructionModelesLassoMLE(phiInit,rhoInit,piInit, tauInit,mini,maxi,gamma,glambda2bis,X,Y,seuil,tau,B1bis,B2bis)
%~ %Pour Lasso-Rank
%~ %on ne garde que les colonnes actives
%~ B1ter=B1bis(:,1,:);
%~ %		[B1ter,ind,rhoLasso,piLasso]=suppressionmodelesegaux2(B1ter,rhoLasso,piLasso)
%~ %Pour chaque lambda, on veut calculer l'estimateur low rank pour la procédure Lasso-Rank
%~ %On obtient une collection de modèles pour Lasso-Rank
%~ %ICI AUSSI ON PEUT PARALLELISER a linterieur de constructionModelesLassoRank
%~ nombreLambda2=size(B1ter,2);
%~ [phi,lvraisemblance,Z] = constructionModelesLassoRank(...
%~ piEmv,rhoEmv,mini,maxi,X,Y,tau,B1ter,2,4);
%~ disp('On a construit les modèles pour les deux procédures')
%~ end
%~ %selectionModelesLassoMLE;
%~ %selectionModelesLassoRank;
%~ disp('On a sélectionné les modèles dans les deux procédures')
Commit	Line	Data
1d3c1faa BA	1	classdef selmix < handle
	2
	3	properties (SetAccess = private)
	4	%always user defined
	5	X; % regression data (size n*p, where n is the number of observations, and p is the number of regressors)
	6	Y; % response data (size n*m, where n is the number of observations, and m is the number of responses)
	7
	8	%optionally user defined some default values
	9	gamma; % power in the penalty
	10	mini; % minimum number of iterations for EM algorithm
	11	maxi; % maximum number of iterations for EM algorithm
	12	eps; % threshold for stopping EM algorithm
	13	kmin; % minimum number of components in the mixture
	14	kmax; % maximum number of components in the mixture
	15	rangmin;
	16	rangmax;
	17
	18	%computed through the workflow
	19	phiInit; % initialisation for the reparametrized conditional mean parameter
	20	rhoInit; % initialisation for the reparametrized variance parameter
	21	piInit; % initialisation for the proportions
	22	tauInit; % initialisation for the allocations probabilities in each component
	23	gridLambda = []; % values for the regularization parameter grid
	24	A1; %je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
	25	A2;
	26	Phi; % collection of estimations for the reparametrized conditional mean parameters
	27	Rho; % collection of estimations for the reparametrized variance parameters
	28	Pi; % collection of estimations for the proportions parameters
	29	end
	30
	31	properties (Constant)
	32	%immutable
	33	seuil = 1e-15;
	34	end
	35
	36	methods
	37	%%%%%%%%%%%%%%%%%%%%%%%
	38	%initialize main object
	39	%%%%%%%%%%%%%%%%%%%%%%%
	40
	41	function sx = selmix(X,Y,varargin)
	42	%set defaults for optional inputs
	43	optargs = {1.0 5 10 1e-6 2 3 2 3};
	44	%replace defaults by user parameters
	45	optargs(1:length(varargin)) = varargin;
	46	sx.X = X;
	47	sx.Y = Y;
	48	[sx.gamma,sx.mini,sx.maxi,sx.eps,sx.kmin,sx.kmax,sx.rangmin,sx.rangmax] = optargs{:};
	49	% z = somme(sx.X,sx.Y);
	50	% sx.Z=z;
	51	end
	52
	53
	54	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	55	%core workflow: compute all models
	56	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	57
	58	function initParameters(sx,k)
	59	[phi0,rho0,pi0,tau0] = initSmallEM(k,sx.X,sx.Y,sx.eps); %smallEM initializes parameters by k-means and regression model in each component, doing this 20 times, and keeping the values maximizing the likelihood after 10 iterations of the EM algorithm.
	60	sx.phiInit = phi0;
	61	sx.rhoInit = rho0;
	62	sx.piInit = pi0;
	63	sx.tauInit = tau0;
	64	end
65
66	function computeGridLambda(sx)
67	[sx.gridLambda] = grillelambda(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.X,sx.Y,sx.gamma,sx.mini,sx.maxi,sx.eps);
68	% computation of the regularization grid, according to explicit
69	% formulae given by EM algorithm.
70	end
71
72	function computeRelevantParameters(sx)
73	[sx.A1,sx.A2,sx.Rho,sx.Pi] = selectiontotale(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps);
74	%select variables according to each regularization parameter
75	%from the grid: sx.A1 corresponding to selected variables, and
76	%sx.A2 corresponding to unselected variables.
77	end
78
79	function [sx,phi,rho,pi]=runProcedure1(sx)
80	[phi,rho,pi,~] = constructionModelesLassoMLE(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps,sx.A1,sx.A2);
81	%compute parameter estimations, with the Maximum Likelihood
82	%Estimator, restricted on selected variables.
83	end
84
85	function [phi] =runProcedure2(sx)
86	[phi,~] = constructionModelesLassoRank(sx.Pi,sx.Rho,sx.mini,sx.maxi,sx.X,sx.Y,sx.eps,sx.A1,sx.rangmin,sx.rangmax);
87	%compute parameter estimations, with the Low Rank
88	%Estimator, restricted on selected variables.
89	end
90
91	% main loop: over all k and all lambda
92	function run(sx,procedure) % Run the all procedure, 1 with the
93	%maximum likelihood refitting, and 2 with the Low Rank refitting.
94	[p,m,~]=size(sx.phiInit);
95	for k=sx.kmin:sx.kmax
96	k
97	initParameters(sx,k);
98	computeGridLambda(sx);
99	computeRelevantParameters(sx);
100	if (procedure == 1)
101	[~,phi,rho,pi] = runProcedure1(sx);
102	Phi2 = sx.Phi;
103	Rho2 = sx.Rho;
104	Pi2 = sx.Pi;
105	p = size(sx.X,2);
106	m = size(sx.Y,2);
107	if size(Phi2) == 0
108	sx.Phi(:,:,1:k,:) = phi;
109	sx.Rho(:,:,1:k,:) = rho;
110	sx.Pi(1:k,:) = pi;
111	else
112	sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4));
113	sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
114	sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi;
115	sx.Rho = zeros(m,m,sx.kmax,size(Rho2,4)+size(rho,4));
116	sx.Rho(:,:,1:size(Rho2,3),1:size(Rho2,4)) = Rho2;
117	sx.Rho(:,:,1:k,size(Rho2,4)+1:end) = rho;
118	sx.Pi = zeros(sx.kmax,size(Pi2,2)+size(pi,2));
119	sx.Pi(1:size(Pi2,1),1:size(Pi2,2)) = Pi2;
120	sx.Pi(1:k,size(Pi2,2)+1:end) = pi;
121	end
122	else
123	[phi] = runProcedure2(sx);
124	phi
125	Phi2 = sx.Phi;
126	if size(Phi2,1) == 0
127	sx.Phi(:,:,1:k,:) = phi;
128	else
129	size(Phi2)
130	sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4));
131	size(sx.Phi)
132	sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
133	sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi;
134	end
135
136	end
137
138
139	end
140	end
141
142	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
143	%pruning: select only one (or a few best ?!) model
144	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
145	%
146	% function[model] selectModel(sx)
147	% %TODO
148	% %model = sxModel(...);
149	% end
150
151	end
152
153	end
154
155
156	%%%%%%%%%%%%%
157	%OLD VERSION:
158
159	%~ for k=K
160	%~ % On initialise
161	%~ initialisation
162	%~ disp('Initialisé')
163	%~ % On construit la grille des lambdas : variables informatives
164	%~ [glambda,phiEmv,rhoEmv,piEmv]=grillelambda(phiInit,rhoInit,piInit,tauInit,x,y,gamma,mini,maxi,tau);
165	%~ glambda=glambda(1:3);
166	%~ disp('glambda construite')
167	%~ % On trouve les variables informatives pour chaque lambda : S est la
168	%~ % matrice des coefficients des variables informatives
169	%~ % on parallelise à l interieur du selectiontotale()
170	%~ [B1,B2,rhoLasso,piLasso]=selectiontotale(phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,glambda,x,y,10^-15,tau);
171	%~ %S1 les variables informatives, S2 celles qui ne le sont pas
172	%~ [B1bis,B2bis,glambda2bis,ind,rhoLasso,piLasso]=suppressionmodelesegaux(B1,B2,glambda,rhoLasso,piLasso);
173	%~ dessinVariablesSelectionnees;
174	%~ disp('Le Lasso est fait')
175	%~ % Pour chaque lambda ainsi construit, on veut calculer l'EMV pour la procédure Lasso-MLE
176	%~ %On obtient une collection de modèles pour Lasso-MLE
177	%~ % ICI AUSSI ON PEUT PARALLELISER a l interieur de constructionModelesLassoMLE
178	%~ nombreLambda=size(B1bis,3);
179	%~ %[phiLassoMLE,rhoLassoMLE,piLassoMLE,vraisLassoMLE]=constructionModelesLassoMLE(phiInit,rhoInit,piInit, tauInit,mini,maxi,gamma,glambda2bis,X,Y,seuil,tau,B1bis,B2bis)
180	%~ %Pour Lasso-Rank
181	%~ %on ne garde que les colonnes actives
182	%~ B1ter=B1bis(:,1,:);
183	%~ % [B1ter,ind,rhoLasso,piLasso]=suppressionmodelesegaux2(B1ter,rhoLasso,piLasso)
184	%~ %Pour chaque lambda, on veut calculer l'estimateur low rank pour la procédure Lasso-Rank
185	%~ %On obtient une collection de modèles pour Lasso-Rank
186	%~ %ICI AUSSI ON PEUT PARALLELISER a linterieur de constructionModelesLassoRank
187	%~ nombreLambda2=size(B1ter,2);
188	%~ [phi,lvraisemblance,Z] = constructionModelesLassoRank(...
189	%~ piEmv,rhoEmv,mini,maxi,X,Y,tau,B1ter,2,4);
190	%~ disp('On a construit les modèles pour les deux procédures')
191	%~ end
192	%~ %selectionModelesLassoMLE;
193	%~ %selectionModelesLassoRank;
194	%~ disp('On a sélectionné les modèles dans les deux procédures')