From 07848d25af9f342f7d8e2dd103f2502d945afe54 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Fri, 3 Mar 2017 11:21:57 +0100
Subject: [PATCH] draft of constructionModelesLassoMLE.R

---
 R/constructionModelesLassoMLE.R | 74 ++++++++++++---------------------
 R/selectVariables.R             |  8 ++--
 2 files changed, 32 insertions(+), 50 deletions(-)

diff --git a/R/constructionModelesLassoMLE.R b/R/constructionModelesLassoMLE.R
index 346339e..55e9419 100644
--- a/R/constructionModelesLassoMLE.R
+++ b/R/constructionModelesLassoMLE.R
@@ -1,57 +1,37 @@
 constructionModelesLassoMLE = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
-	X,Y,seuil,tau,A1,A2)
+	X,Y,seuil,tau,selected)
 {
-  n = dim(X)[1];
-  p = dim(phiInit)[1]
-  m = dim(phiInit)[2]
-  k = dim(phiInit)[3]
-  L = length(glambda)
-
-  #output parameters
-  phi = array(0, dim=c(p,m,k,L))
-  rho = array(0, dim=c(m,m,k,L))
-  pi = matrix(0, k, L)
-  llh = matrix(0, L, 2) #log-likelihood
-
-  for(lambdaIndex in 1:L)
+	#TODO: parameter ncores (chaque tâche peut aussi demander du parallélisme...)
+	cl = parallel::makeCluster( parallel::detectCores() / 4 )
+	parallel::clusterExport(cl=cl,
+		varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","seuil","tau"),
+		envir=environment())
+	#Pour chaque lambda de la grille, on calcule les coefficients
+	out = parLapply( seq_along(glambda), function(lambdaindex)
 	{
-    a = A1[,1,lambdaIndex]
-    a = a[a!=0]
-    if(length(a)==0)
-      next
+		n = dim(X)[1]
+		p = dim(phiInit)[1]
+		m = dim(phiInit)[2]
+		k = dim(phiInit)[3]
 
-    res = EMGLLF(phiInit[a,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[,a],Y,tau)
+		#TODO: phiInit[selected] et X[selected] sont bien sûr faux; par quoi remplacer ?
+		#lambda == 0 c'est normal ?
+    res = EMGLLF(phiInit[selected],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[selected],Y,tau)
 
-		#TODO: supprimer ça et utiliser parLapply(...)
-    for (j in 1:length(a))
-      phi[a[j],,,lambdaIndex] = res$phi[j,,]
-    rho[,,,lambdaIndex] = res$rho
-    pi[,lambdaIndex] = res$pi
-
-    dimension = 0
-    for (j in 1:p) #TODO: doit pouvoir être fait beaucoup plus simplement
-		{
-      b = A2[j,2:dim(A2)[2],lambdaIndex]
-      b = b[b!=0]
-      if (length(b) > 0)
-        phi[A2[j,1,lambdaIndex],b,,lambdaIndex] = 0.
-      c = A1[j,2:dim(A1)[2],lambdaIndex]
-      dimension = dimension + sum(c!=0)
-    }
+		#comment évaluer la dimension à partir du résultat et de [not]selected ?
+    #dimension = ...
 
     #on veut calculer l'EMV avec toutes nos estimations
-		densite = matrix(0, nrow=n, ncol=L)
-		for (i in 1:n) #TODO: pas besoin de cette boucle (vectorize)
+		densite = vector("double",n)
+		for (r in 1:k)
 		{
-			for (r in 1:k)
-			{
-				delta = Y[i,]%*%rho[,,r,lambdaIndex] - (X[i,a]%*%phi[a,,r,lambdaIndex]);
-				densite[i,lambdaIndex] = densite[i,lambdaIndex] + pi[r,lambdaIndex] *
-					det(rho[,,r,lambdaIndex])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0)
-			}
+			delta = Y%*%rho[,,r] - (X[selected]%*%res$phi[selected,,r])
+			densite = densite + pi[r] *
+				det(rho[,,r])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0)
 		}
-		llh[lambdaIndex,1] = sum(log(densite[,lambdaIndex]))
-		llh[lambdaIndex,2] = (dimension+m+1)*k-1
-  }
-  return (list("phi"=phi, "rho"=rho, "pi"=pi, "llh" = llh))
+		llh = c( sum(log(densite[,lambdaIndex])), (dimension+m+1)*k-1 )
+		list("phi"=res$phi, "rho"=res$rho, "pi"=res$pi, "llh" = llh)
+	})
+	parallel::stopCluster(cl)
+	out
 }
diff --git a/R/selectVariables.R b/R/selectVariables.R
index 03578c9..46fb3f3 100644
--- a/R/selectVariables.R
+++ b/R/selectVariables.R
@@ -21,17 +21,19 @@
 #' @export
 selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau)
 {
+	#TODO: parameter ncores (chaque tâche peut aussi demander du parallélisme...)
 	cl = parallel::makeCluster( parallel::detectCores() / 4 )
 	parallel::clusterExport(cl=cl,
 		varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","seuil","tau"),
 		envir=environment())
 	#Pour chaque lambda de la grille, on calcule les coefficients
-	out = parLapply( 1:L, function(lambdaindex)
+	out = parLapply( seq_along(glambda), function(lambdaindex)
 	{
-		params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda[lambdaIndex],X,Y,tau)
-
 		p = dim(phiInit)[1]
 		m = dim(phiInit)[2]
+
+		params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda[lambdaIndex],X,Y,tau)
+
 		#selectedVariables: list where element j contains vector of selected variables in [1,m]
 		selectedVariables = lapply(1:p, function(j) {
 			#from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
-- 
2.44.0