utilisation de k-means au lieu de hierarchique dans initSmallEM - PB de dimensions...
[valse.git] / R / main.R
1 SelMix = setRefClass(
2 Class = "selmix",
3
4 fields = c(
5 # User defined
6
7 # regression data (size n*p, where n is the number of observations,
8 # and p is the number of regressors)
9 X = "numeric",
10 # response data (size n*m, where n is the number of observations,
11 # and m is the number of responses)
12 Y = "numeric",
13
14 # Optionally user defined (some default values)
15
16 # power in the penalty
17 gamma = "double",
18 # minimum number of iterations for EM algorithm
19 mini = "integer",
20 # maximum number of iterations for EM algorithm
21 maxi = "integer",
22 # threshold for stopping EM algorithm
23 eps = "double",
24 # minimum number of components in the mixture
25 kmin = "integer",
26 # maximum number of components in the mixture
27 kmax = "integer",
28 rangmin = "integer",
29 rangmax = "integer",
30
31 # Computed through the workflow
32
33 # initialisation for the reparametrized conditional mean parameter
34 phiInit,
35 # initialisation for the reparametrized variance parameter
36 rhoInit,
37 # initialisation for the proportions
38 piInit,
39 # initialisation for the allocations probabilities in each component
40 tauInit,
41 # values for the regularization parameter grid
42 gridLambda = c(),
43 # je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
44 # une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
45 A1,
46 A2,
47 # collection of estimations for the reparametrized conditional mean parameters
48 Phi,
49 # collection of estimations for the reparametrized variance parameters
50 Rho,
51 # collection of estimations for the proportions parameters
52 Pi,
53
54 #immutable
55 seuil = 1e-15
56 ),
57
58 methods = list(
59 #######################
60 #initialize main object
61 #######################
62 initialize = function(X,Y,...)
63 {
64 "Initialize SelMix object"
65
66 callSuper(...)
67
68 X <<- X;
69 Y <<- Y;
70 gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
71 mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
72 maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
73 eps <<- ifelse (hasArg("eps"), eps, 1e-6)
74 kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
75 kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
76 rangmin <<- ifelse (hasArg("rangmin"), rangmin, as.integer(2))
77 rangmax <<- ifelse (hasArg("rangmax"), rangmax, as.integer(3))
78 },
79
80 ##################################
81 #core workflow: compute all models
82 ##################################
83
84 initParameters = function(k)
85 {
86 "Parameters initialization"
87
88 #smallEM initializes parameters by k-means and regression model in each component,
89 #doing this 20 times, and keeping the values maximizing the likelihood after 10
90 #iterations of the EM algorithm.
91 init = initSmallEM(k,sx.X,sx.Y,sx.eps);
92 phiInit <<- init$phi0;
93 rhoInit <<- init$rho0;
94 piInit <<- init$pi0;
95 tauInit <<- init$tau0;
96 },
97
98 computeGridLambda = function()
99 {
100 "computation of the regularization grid"
101 #(according to explicit formula given by EM algorithm)
102
103 gridLambda <<- gridLambda(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.X,sx.Y,
104 sx.gamma,sx.mini,sx.maxi,sx.eps);
105 },
106
107 computeRelevantParameters = function()
108 {
109 "Compute relevant parameters"
110
111 #select variables according to each regularization parameter
112 #from the grid: sx.A1 corresponding to selected variables, and
113 #sx.A2 corresponding to unselected variables.
114 params = selectiontotale(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,
115 sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps);
116 A1 <<- params$A1
117 A2 <<- params$A2
118 Rho <<- params$Rho
119 Pi <<- params$Pi
120 },
121
122 runProcedure1 = function()
123 {
124 "Run procedure 1 [EMGLLF]"
125
126 #compute parameter estimations, with the Maximum Likelihood
127 #Estimator, restricted on selected variables.
128 res = constructionModelesLassoMLE(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,
129 sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps,sx.A1,sx.A2);
130 return (list( phi=res$phi, rho=res$rho, pi=res$pi))
131 },
132
133 runProcedure2 = function()
134 {
135 "Run procedure 2 [EMGrank]"
136
137 #compute parameter estimations, with the Low Rank
138 #Estimator, restricted on selected variables.
139 return (constructionModelesLassoRank(sx.Pi,sx.Rho,sx.mini,sx.maxi,sx.X,sx.Y,sx.eps,
140 sx.A1,sx.rangmin,sx.rangmax)$phi)
141 },
142
143 run = function(procedure)
144 {
145 "main loop: over all k and all lambda"
146
147 # Run the all procedure, 1 with the
148 #maximum likelihood refitting, and 2 with the Low Rank refitting.
149 p = dim(phiInit)[1]
150 m = dim(phiInit)[2]
151 for (k in kmin:kmax)
152 {
153 print(k)
154 initParameters(k)
155 computeGridLambda()
156 computeRelevantParameters()
157 if (procedure == 1)
158 {
159 r1 = runProcedure1(sx)
160 Phi2 = Phi
161 Rho2 = Rho
162 Pi2 = Pi
163 p = ncol(X)
164 m = ncol(Y)
165 if size(Phi2) == 0 #TODO: continue translation MATLAB --> R
166 Phi(:,:,1:k,:) = r1$phi;
167 Rho(:,:,1:k,:) = r1$rho;
168 Pi(1:k,:) = r1$pi;
169 else
170 Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(r1$phi,4));
171 Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
172 Phi(:,:,1:k,size(Phi2,4)+1:end) = r1$phi;
173 Rho = zeros(m,m,sx.kmax,size(Rho2,4)+size(r1$rho,4));
174 Rho(:,:,1:size(Rho2,3),1:size(Rho2,4)) = Rho2;
175 Rho(:,:,1:k,size(Rho2,4)+1:end) = r1$rho;
176 Pi = zeros(sx.kmax,size(Pi2,2)+size(r1$pi,2));
177 Pi(1:size(Pi2,1),1:size(Pi2,2)) = Pi2;
178 Pi(1:k,size(Pi2,2)+1:end) = r1$pi;
179 end
180 else
181 [phi] = runProcedure2(sx);
182 phi
183 Phi2 = sx.Phi;
184 if size(Phi2,1) == 0
185 sx.Phi(:,:,1:k,:) = phi;
186 else
187 size(Phi2)
188 sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4));
189 size(sx.Phi)
190 sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2;
191 sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi;
192 end
193
194 end
195
196
197 end
198 end
199
200 ##################################################
201 #pruning: select only one (or a few best ?!) model
202 ##################################################
203 #
204 # function[model] selectModel(sx)
205 # #TODO
206 # #model = sxModel(...);
207 # end
208
209 )
210 )