few last things
[valse.git] / pkg / R / main.R
CommitLineData
cad71b2c
BA
1#' @useDynLib valse
2
8e92c49c
BA
3Valse = setRefClass(
4 Class = "Valse",
39046da6
BA
5
6 fields = c(
7 # User defined
8
9 # regression data (size n*p, where n is the number of observations,
10 # and p is the number of regressors)
aa8df014 11 X = "matrix",
8e92c49c 12 # response data (size n*m, where n is the number of observations,
39046da6 13 # and m is the number of responses)
aa8df014 14 Y = "matrix",
39046da6
BA
15
16 # Optionally user defined (some default values)
17
18 # power in the penalty
09ab3c16 19 gamma = "numeric",
39046da6
BA
20 # minimum number of iterations for EM algorithm
21 mini = "integer",
22 # maximum number of iterations for EM algorithm
23 maxi = "integer",
24 # threshold for stopping EM algorithm
09ab3c16 25 eps = "numeric",
39046da6
BA
26 # minimum number of components in the mixture
27 kmin = "integer",
28 # maximum number of components in the mixture
29 kmax = "integer",
22d21a22 30 # ranks for the Lasso-Rank procedure
31 rank.min = "integer",
32 rank.max = "integer",
33
39046da6
BA
34 # Computed through the workflow
35
36 # initialisation for the reparametrized conditional mean parameter
09ab3c16 37 phiInit = "numeric",
39046da6 38 # initialisation for the reparametrized variance parameter
09ab3c16 39 rhoInit = "numeric",
39046da6 40 # initialisation for the proportions
09ab3c16 41 piInit = "numeric",
39046da6 42 # initialisation for the allocations probabilities in each component
09ab3c16 43 tauInit = "numeric",
39046da6 44 # values for the regularization parameter grid
09ab3c16 45 gridLambda = "numeric",
39046da6
BA
46 # je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
47 # une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
09ab3c16
BA
48 A1 = "integer",
49 A2 = "integer",
39046da6 50 # collection of estimations for the reparametrized conditional mean parameters
09ab3c16 51 Phi = "numeric",
39046da6 52 # collection of estimations for the reparametrized variance parameters
09ab3c16 53 Rho = "numeric",
39046da6 54 # collection of estimations for the proportions parameters
09ab3c16 55 Pi = "numeric",
39046da6 56
09ab3c16 57 #immutable (TODO:?)
22d21a22 58 thresh = "numeric"
39046da6
BA
59 ),
60
61 methods = list(
62 #######################
63 #initialize main object
64 #######################
65 initialize = function(X,Y,...)
66 {
8e92c49c 67 "Initialize Valse object"
39046da6
BA
68
69 callSuper(...)
70
8e92c49c
BA
71 X <<- X
72 Y <<- Y
39046da6
BA
73 gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
74 mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
75 maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
76 eps <<- ifelse (hasArg("eps"), eps, 1e-6)
77 kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
78 kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
22d21a22 79 rank.min <<- ifelse (hasArg("rank.min"), rank.min, as.integer(2))
80 rank.max <<- ifelse (hasArg("rank.max"), rank.max, as.integer(3))
81 thresh <<- 1e-15 #immutable (TODO:?)
39046da6
BA
82 },
83
84 ##################################
85 #core workflow: compute all models
86 ##################################
87
88 initParameters = function(k)
89 {
90 "Parameters initialization"
91
92 #smallEM initializes parameters by k-means and regression model in each component,
93 #doing this 20 times, and keeping the values maximizing the likelihood after 10
94 #iterations of the EM algorithm.
e3f2fe8a 95 init = initSmallEM(k,X,Y)
8e92c49c
BA
96 phiInit <<- init$phi0
97 rhoInit <<- init$rho0
98 piInit <<- init$pi0
99 tauInit <<- init$tau0
39046da6
BA
100 },
101
102 computeGridLambda = function()
103 {
104 "computation of the regularization grid"
105 #(according to explicit formula given by EM algorithm)
106
8e92c49c 107 gridLambda <<- gridLambda(phiInit,rhoInit,piInit,tauInit,X,Y,gamma,mini,maxi,eps)
39046da6
BA
108 },
109
110 computeRelevantParameters = function()
111 {
112 "Compute relevant parameters"
113
114 #select variables according to each regularization parameter
8e92c49c
BA
115 #from the grid: A1 corresponding to selected variables, and
116 #A2 corresponding to unselected variables.
117 params = selectiontotale(
22d21a22 118 phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps)
39046da6
BA
119 A1 <<- params$A1
120 A2 <<- params$A2
121 Rho <<- params$Rho
122 Pi <<- params$Pi
123 },
124
125 runProcedure1 = function()
126 {
127 "Run procedure 1 [EMGLLF]"
128
129 #compute parameter estimations, with the Maximum Likelihood
130 #Estimator, restricted on selected variables.
8e92c49c 131 return ( constructionModelesLassoMLE(
22d21a22 132 phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps,A1,A2) )
39046da6
BA
133 },
134
135 runProcedure2 = function()
136 {
137 "Run procedure 2 [EMGrank]"
138
139 #compute parameter estimations, with the Low Rank
140 #Estimator, restricted on selected variables.
8e92c49c 141 return ( constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,eps,
22d21a22 142 A1,rank.min,rank.max) )
39046da6
BA
143 },
144
8e92c49c 145 run = function()
39046da6
BA
146 {
147 "main loop: over all k and all lambda"
148
22d21a22 149 # Run the whole procedure, 1 with the
39046da6 150 #maximum likelihood refitting, and 2 with the Low Rank refitting.
8e92c49c
BA
151 p = dim(phiInit)[1]
152 m = dim(phiInit)[2]
153 for (k in kmin:kmax)
154 {
155 print(k)
156 initParameters(k)
157 computeGridLambda()
158 computeRelevantParameters()
159 if (procedure == 1)
39046da6 160 {
8e92c49c
BA
161 r1 = runProcedure1()
162 Phi2 = Phi
163 Rho2 = Rho
164 Pi2 = Pi
165 p = ncol(X)
166 m = ncol(Y)
09ab3c16 167 if (is.null(dim(Phi2))) #test was: size(Phi2) == 0
8e92c49c 168 {
09ab3c16
BA
169 Phi[,,1:k] <<- r1$phi
170 Rho[,,1:k] <<- r1$rho
171 Pi[1:k,] <<- r1$pi
8e92c49c
BA
172 } else
173 {
09ab3c16
BA
174 Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(r1$phi)[4]))
175 Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
176 Phi[,,1:k,dim(Phi2)[4]+1] <<- r1$phi
177 Rho <<- array(0., dim=c(m,m,kmax,dim(Rho2)[4]+dim(r1$rho)[4]))
178 Rho[,,1:(dim(Rho2)[3]),1:(dim(Rho2)[4])] <<- Rho2
179 Rho[,,1:k,dim(Rho2)[4]+1] <<- r1$rho
180 Pi <<- array(0., dim=c(kmax,dim(Pi2)[2]+dim(r1$pi)[2]))
181 Pi[1:nrow(Pi2),1:ncol(Pi2)] <<- Pi2
182 Pi[1:k,ncol(Pi2)+1] <<- r1$pi
8e92c49c
BA
183 }
184 } else
185 {
186 phi = runProcedure2()$phi
187 Phi2 = Phi
188 if (dim(Phi2)[1] == 0)
189 {
09ab3c16 190 Phi[,,1:k,] <<- phi
8e92c49c 191 } else
39046da6 192 {
09ab3c16
BA
193 Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(phi)[4]))
194 Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
195 Phi[,,1:k,-(1:(dim(Phi2)[4]))] <<- phi
8e92c49c
BA
196 }
197 }
198 }
199 }
200
39046da6 201 ##################################################
8e92c49c 202 #TODO: pruning: select only one (or a few best ?!) model
39046da6
BA
203 ##################################################
204 #
8e92c49c 205 # function[model] selectModel(
39046da6 206 # #TODO
8e92c49c 207 # #model = odel(...)
39046da6 208 # end
22d21a22 209 # Give at least the slope heuristic and BIC, and AIC ?
8e92c49c 210
39046da6
BA
211 )
212)