Add 'fast' argument to select C code or R code
[valse.git] / pkg / R / selectVariables.R
index ce7d3b3..b23eac2 100644 (file)
@@ -14,6 +14,7 @@
 #' @param Y                     matrix of responses
 #' @param thres         threshold to consider a coefficient to be equal to 0
 #' @param tau           threshold to say that EM algorithm has converged
+#' @param ncores Number or cores for parallel execution (1 to disable)
 #'
 #' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
 #'
 #' @export
 #'
 selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
-       X,Y,thresh,tau, ncores=1) #ncores==1 ==> no //
+       X,Y,thresh,tau, ncores=3, fast=TRUE)
 {
        if (ncores > 1)
        {
-               cl = parallel::makeCluster(ncores)
+               cl = parallel::makeCluster(ncores, outfile='')
                parallel::clusterExport(cl=cl,
                        varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
                        envir=environment())
        }
 
        # Calcul pour un lambda
-       computeCoefs <-function(lambda)
+       computeCoefs <- function(lambda)
        {
-               params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
+               params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
 
                p = dim(phiInit)[1]
                m = dim(phiInit)[2]
@@ -44,19 +45,24 @@ selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambd
                selectedVariables = lapply(1:p, function(j) {
                        #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
                        #and finally return the corresponding indices
-                       seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
+                 seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
                })
 
-               list("selected"=selectedVariables,"Rho"=params$Rho,"Pi"=params$Pi)
+               list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
        }
 
        # Pour chaque lambda de la grille, on calcule les coefficients
        out <-
                if (ncores > 1)
-                       parLapply(cl, seq_along(glambda, computeCoefs)
+                       parLapply(cl, glambda, computeCoefs)
                else
-                       lapply(seq_along(glambda), computeCoefs)
+                       lapply(glambda, computeCoefs)
        if (ncores > 1)
                parallel::stopCluster(cl)
+
+       # Suppression doublons
+       sha1_array <- lapply(out, digest::sha1)
+       out[ !duplicated(sha1_array) ]
+
        out
 }