test auto-indenter
[valse.git] / pkg / R / selectVariables.R
CommitLineData
7064275b 1#' selectVariables
bb551124 2#'
7064275b 3#' It is a function which construct, for a given lambda, the sets of relevant variables.
e01c9b1f 4#'
5#' @param phiInit an initial estimator for phi (size: p*m*k)
6#' @param rhoInit an initial estimator for rho (size: m*m*k)
09ab3c16 7#' @param piInit an initial estimator for pi (size : k)
e01c9b1f 8#' @param gamInit an initial estimator for gamma
09ab3c16
BA
9#' @param mini minimum number of iterations in EM algorithm
10#' @param maxi maximum number of iterations in EM algorithm
11#' @param gamma power in the penalty
e01c9b1f 12#' @param glambda grid of regularization parameters
09ab3c16
BA
13#' @param X matrix of regressors
14#' @param Y matrix of responses
43d76c49 15#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
16#' @param eps threshold to say that EM algorithm has converged
4cc632c9 17#' @param ncores Number or cores for parallel execution (1 to disable)
e01c9b1f 18#'
7064275b 19#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
cad71b2c
BA
20#'
21#' @examples TODO
e01c9b1f 22#'
cad71b2c 23#' @export
bb551124
BA
24#'
25selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
43d76c49 26 X,Y,thresh=1e-8,eps, ncores=3, fast=TRUE)
09ab3c16 27{
fb6e49cb 28 if (ncores > 1)
29 {
30 cl = parallel::makeCluster(ncores, outfile='')
31 parallel::clusterExport(cl=cl,
43d76c49 32 varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","eps"),
fb6e49cb 33 envir=environment())
34 }
35
36 # Computation for a fixed lambda
37 computeCoefs <- function(lambda)
38 {
43d76c49 39 params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,fast)
fb6e49cb 40
41 p = dim(phiInit)[1]
42 m = dim(phiInit)[2]
43
44 #selectedVariables: list where element j contains vector of selected variables in [1,m]
45 selectedVariables = lapply(1:p, function(j) {
46 #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
47 #and finally return the corresponding indices
48 seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
49 })
50
51 list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
52 }
53
54 # For each lambda in the grid, we compute the coefficients
55 out <-
56 if (ncores > 1)
57 parLapply(cl, glambda, computeCoefs)
58 else
59 lapply(glambda, computeCoefs)
60 if (ncores > 1)
61 parallel::stopCluster(cl)
62 # Suppress models which are computed twice
63 #En fait, ca ca fait la comparaison de tous les parametres
64 #On veut juste supprimer ceux qui ont les memes variables sélectionnées
65 #sha1_array <- lapply(out, digest::sha1)
66 #out[ duplicated(sha1_array) ]
67 selec = lapply(out, function(model) model$selected)
68 ind_dup = duplicated(selec)
69 ind_uniq = which(!ind_dup)
70 out2 = list()
71 for (l in 1:length(ind_uniq)){
72 out2[[l]] = out[[ind_uniq[l]]]
73 }
74 out2
09ab3c16 75}