finished translation MATLAB --> R,C ; start debugging...
[valse.git] / R / main.R
1 Valse = setRefClass(
2 Class = "Valse",
3
4 fields = c(
5 # User defined
6
7 # regression data (size n*p, where n is the number of observations,
8 # and p is the number of regressors)
9 X = "numeric",
10 # response data (size n*m, where n is the number of observations,
11 # and m is the number of responses)
12 Y = "numeric",
13
14 # Optionally user defined (some default values)
15
16 # power in the penalty
17 gamma = "double",
18 # minimum number of iterations for EM algorithm
19 mini = "integer",
20 # maximum number of iterations for EM algorithm
21 maxi = "integer",
22 # threshold for stopping EM algorithm
23 eps = "double",
24 # minimum number of components in the mixture
25 kmin = "integer",
26 # maximum number of components in the mixture
27 kmax = "integer",
28 rangmin = "integer",
29 rangmax = "integer",
30
31 # Computed through the workflow
32
33 # initialisation for the reparametrized conditional mean parameter
34 phiInit,
35 # initialisation for the reparametrized variance parameter
36 rhoInit,
37 # initialisation for the proportions
38 piInit,
39 # initialisation for the allocations probabilities in each component
40 tauInit,
41 # values for the regularization parameter grid
42 gridLambda = c(),
43 # je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
44 # une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
45 A1,
46 A2,
47 # collection of estimations for the reparametrized conditional mean parameters
48 Phi,
49 # collection of estimations for the reparametrized variance parameters
50 Rho,
51 # collection of estimations for the proportions parameters
52 Pi,
53
54 #immutable
55 seuil = 1e-15
56 ),
57
58 methods = list(
59 #######################
60 #initialize main object
61 #######################
62 initialize = function(X,Y,...)
63 {
64 "Initialize Valse object"
65
66 callSuper(...)
67
68 X <<- X
69 Y <<- Y
70 gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
71 mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
72 maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
73 eps <<- ifelse (hasArg("eps"), eps, 1e-6)
74 kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
75 kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
76 rangmin <<- ifelse (hasArg("rangmin"), rangmin, as.integer(2))
77 rangmax <<- ifelse (hasArg("rangmax"), rangmax, as.integer(3))
78 },
79
80 ##################################
81 #core workflow: compute all models
82 ##################################
83
84 initParameters = function(k)
85 {
86 "Parameters initialization"
87
88 #smallEM initializes parameters by k-means and regression model in each component,
89 #doing this 20 times, and keeping the values maximizing the likelihood after 10
90 #iterations of the EM algorithm.
91 init = initSmallEM(k,X,Y,eps)
92 phiInit <<- init$phi0
93 rhoInit <<- init$rho0
94 piInit <<- init$pi0
95 tauInit <<- init$tau0
96 },
97
98 computeGridLambda = function()
99 {
100 "computation of the regularization grid"
101 #(according to explicit formula given by EM algorithm)
102
103 gridLambda <<- gridLambda(phiInit,rhoInit,piInit,tauInit,X,Y,gamma,mini,maxi,eps)
104 },
105
106 computeRelevantParameters = function()
107 {
108 "Compute relevant parameters"
109
110 #select variables according to each regularization parameter
111 #from the grid: A1 corresponding to selected variables, and
112 #A2 corresponding to unselected variables.
113 params = selectiontotale(
114 phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,seuil,eps)
115 A1 <<- params$A1
116 A2 <<- params$A2
117 Rho <<- params$Rho
118 Pi <<- params$Pi
119 },
120
121 runProcedure1 = function()
122 {
123 "Run procedure 1 [EMGLLF]"
124
125 #compute parameter estimations, with the Maximum Likelihood
126 #Estimator, restricted on selected variables.
127 return ( constructionModelesLassoMLE(
128 phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,seuil,eps,A1,A2) )
129 },
130
131 runProcedure2 = function()
132 {
133 "Run procedure 2 [EMGrank]"
134
135 #compute parameter estimations, with the Low Rank
136 #Estimator, restricted on selected variables.
137 return ( constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,eps,
138 A1,rangmin,rangmax) )
139 },
140
141 run = function()
142 {
143 "main loop: over all k and all lambda"
144
145 # Run the all procedure, 1 with the
146 #maximum likelihood refitting, and 2 with the Low Rank refitting.
147 p = dim(phiInit)[1]
148 m = dim(phiInit)[2]
149 for (k in kmin:kmax)
150 {
151 print(k)
152 initParameters(k)
153 computeGridLambda()
154 computeRelevantParameters()
155 if (procedure == 1)
156 {
157 r1 = runProcedure1()
158 Phi2 = Phi
159 Rho2 = Rho
160 Pi2 = Pi
161 p = ncol(X)
162 m = ncol(Y)
163 if size(Phi2) == 0
164 {
165 Phi[,,1:k] = r1$phi
166 Rho[,,1:k] = r1$rho
167 Pi[1:k,] = r1$pi
168 } else
169 {
170 Phi = array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(r1$phi)[4]))
171 Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] = Phi2
172 Phi[,,1:k,dim(Phi2)[4]+1] = r1$phi
173 Rho = array(0., dim=c(m,m,kmax,dim(Rho2)[4]+dim(r1$rho)[4]))
174 Rho[,,1:(dim(Rho2)[3]),1:(dim(Rho2)[4])] = Rho2
175 Rho[,,1:k,dim(Rho2)[4]+1] = r1$rho
176 Pi = array(0., dim=c(kmax,dim(Pi2)[2]+dim(r1$pi)[2]))
177 Pi[1:nrow(Pi2),1:ncol(Pi2)] = Pi2
178 Pi[1:k,ncol(Pi2)+1] = r1$pi
179 }
180 } else
181 {
182 phi = runProcedure2()$phi
183 Phi2 = Phi
184 if (dim(Phi2)[1] == 0)
185 {
186 Phi(:,:,1:k,:) = phi
187 } else
188 {
189 size(Phi2)
190 Phi = zeros(p,m,kmax,size(Phi2,4)+size(phi,4))
191 size(Phi)
192 Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2
193 Phi(:,:,1:k,size(Phi2,4)+1:end) = phi
194 }
195 }
196 }
197 }
198
199 ##################################################
200 #TODO: pruning: select only one (or a few best ?!) model
201 ##################################################
202 #
203 # function[model] selectModel(
204 # #TODO
205 # #model = odel(...)
206 # end
207
208 )
209 )