pkg/R/main.R

   1 #' @useDynLib valse
   2
   3 Valse = setRefClass(
   4     Class = "Valse",
   5
   6     fields = c(
   7         # User defined
   8
   9         # regression data (size n*p, where n is the number of observations,
  10         # and p is the number of regressors)
  11         X = "matrix",
  12         # response data (size n*m, where n is the number of observations,
  13         # and m is the number of responses)
  14         Y = "matrix",
  15
  16         # Optionally user defined (some default values)
  17
  18         # power in the penalty
  19         gamma = "numeric",
  20         # minimum number of iterations for EM algorithm
  21         mini = "integer",
  22         # maximum number of iterations for EM algorithm
  23         maxi = "integer",
  24         # threshold for stopping EM algorithm
  25         eps = "numeric",
  26         # minimum number of components in the mixture
  27         kmin = "integer",
  28         # maximum number of components in the mixture
  29         kmax = "integer",
  30         # ranks for the Lasso-Rank procedure
  31         rank.min = "integer",
  32         rank.max = "integer",
  33
  34         # Computed through the workflow
  35
  36         # initialisation for the reparametrized conditional mean parameter
  37         phiInit = "numeric",
  38         # initialisation for the reparametrized variance parameter
  39         rhoInit = "numeric",
  40         # initialisation for the proportions
  41         piInit = "numeric",
  42         # initialisation for the allocations probabilities in each component
  43         tauInit = "numeric",
  44         # values for the regularization parameter grid
  45         gridLambda = "numeric",
  46         # je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
  47         # une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
  48         A1 = "integer",
  49         A2 = "integer",
  50         # collection of estimations for the reparametrized conditional mean parameters
  51         Phi = "numeric",
  52         # collection of estimations for the reparametrized variance parameters
  53         Rho = "numeric",
  54         # collection of estimations for the proportions parameters
  55         Pi = "numeric",
  56
  57         #immutable (TODO:?)
  58         thresh = "numeric"
  59     ),
  60
  61     methods = list(
  62         #######################
  63         #initialize main object
  64         #######################
  65         initialize = function(X,Y,...)
  66         {
  67             "Initialize Valse object"
  68
  69             callSuper(...)
  70
  71             X <<- X
  72             Y <<- Y
  73             gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
  74             mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
  75             maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
  76             eps <<- ifelse (hasArg("eps"), eps, 1e-6)
  77             kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
  78             kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
  79             rank.min <<- ifelse (hasArg("rank.min"), rank.min, as.integer(2))
  80             rank.max <<- ifelse (hasArg("rank.max"), rank.max, as.integer(3))
  81             thresh <<- 1e-15 #immutable (TODO:?)
  82         },
  83
  84         ##################################
  85         #core workflow: compute all models
  86         ##################################
  87
  88         initParameters = function(k)
  89         {
  90             "Parameters initialization"
  91
  92             #smallEM initializes parameters by k-means and regression model in each component,
  93             #doing this 20 times, and keeping the values maximizing the likelihood after 10
  94             #iterations of the EM algorithm.
  95             init = initSmallEM(k,X,Y)
  96             phiInit <<- init$phi0
  97             rhoInit <<- init$rho0
  98             piInit  <<- init$pi0
  99             tauInit <<- init$tau0
 100         },
 101
 102         computeGridLambda = function()
 103         {
 104             "computation of the regularization grid"
 105             #(according to explicit formula given by EM algorithm)
 106
 107             gridLambda <<- gridLambda(phiInit,rhoInit,piInit,tauInit,X,Y,gamma,mini,maxi,eps)
 108         },
 109
 110         computeRelevantParameters = function()
 111         {
 112             "Compute relevant parameters"
 113
 114             #select variables according to each regularization parameter
 115             #from the grid: A1 corresponding to selected variables, and
 116             #A2 corresponding to unselected variables.
 117             params = selectiontotale(
 118                 phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps)
 119             A1 <<- params$A1
 120             A2 <<- params$A2
 121             Rho <<- params$Rho
 122             Pi <<- params$Pi
 123         },
 124
 125         runProcedure1 = function()
 126         {
 127             "Run procedure 1 [EMGLLF]"
 128
 129             #compute parameter estimations, with the Maximum Likelihood
 130             #Estimator, restricted on selected variables.
 131             return ( constructionModelesLassoMLE(
 132                 phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps,A1,A2) )
 133         },
 134
 135         runProcedure2 = function()
 136         {
 137             "Run procedure 2 [EMGrank]"
 138
 139             #compute parameter estimations, with the Low Rank
 140             #Estimator, restricted on selected variables.
 141             return ( constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,eps,
 142                 A1,rank.min,rank.max) )
 143         },
 144
 145         run = function()
 146         {
 147             "main loop: over all k and all lambda"
 148
 149             # Run the whole procedure, 1 with the
 150             #maximum likelihood refitting, and 2 with the Low Rank refitting.
 151             p = dim(phiInit)[1]
 152             m = dim(phiInit)[2]
 153             for (k in kmin:kmax)
 154             {
 155                 print(k)
 156                 initParameters(k)
 157                 computeGridLambda()
 158                 computeRelevantParameters()
 159                 if (procedure == 1)
 160                 {
 161                     r1 = runProcedure1()
 162                     Phi2 = Phi
 163                     Rho2 = Rho
 164                     Pi2 = Pi
 165                     p = ncol(X)
 166                     m = ncol(Y)
 167                     if (is.null(dim(Phi2))) #test was: size(Phi2) == 0
 168                     {
 169                         Phi[,,1:k] <<- r1$phi
 170                         Rho[,,1:k] <<- r1$rho
 171                         Pi[1:k,] <<- r1$pi
 172                     } else
 173                     {
 174                         Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(r1$phi)[4]))
 175                         Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
 176                         Phi[,,1:k,dim(Phi2)[4]+1] <<- r1$phi
 177                         Rho <<- array(0., dim=c(m,m,kmax,dim(Rho2)[4]+dim(r1$rho)[4]))
 178                         Rho[,,1:(dim(Rho2)[3]),1:(dim(Rho2)[4])] <<- Rho2
 179                         Rho[,,1:k,dim(Rho2)[4]+1] <<- r1$rho
 180                         Pi <<- array(0., dim=c(kmax,dim(Pi2)[2]+dim(r1$pi)[2]))
 181                         Pi[1:nrow(Pi2),1:ncol(Pi2)] <<- Pi2
 182                         Pi[1:k,ncol(Pi2)+1] <<- r1$pi
 183                     }
 184                 } else
 185                 {
 186                     phi = runProcedure2()$phi
 187                     Phi2 = Phi
 188                     if (dim(Phi2)[1] == 0)
 189                     {
 190                         Phi[,,1:k,] <<- phi
 191                     } else
 192                     {
 193                         Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(phi)[4]))
 194                         Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
 195                         Phi[,,1:k,-(1:(dim(Phi2)[4]))] <<- phi
 196                     }
 197                 }
 198             }
 199         }
 200
 201         ##################################################
 202         #TODO: pruning: select only one (or a few best ?!) model
 203         ##################################################
 204         #
 205         #       function[model] selectModel(
 206         #           #TODO
 207         #           #model = odel(...)
 208         #       end
 209         # Give at least the slope heuristic and BIC, and AIC ?
 210
 211         )
 212 )