From: emilie <emilie@devijver.org>
Date: Wed, 12 Apr 2017 15:57:28 +0000 (+0200)
Subject: fix many problems (models appearing twice, irrelevant coefficients in a relevant... 
X-Git-Url: https://git.auder.net/variants/Chakart/doc/scripts/pieces/app_dev.php?a=commitdiff_plain;h=fb6e49cb85308c3f99cc98fe955aa7c36839c819;p=valse.git

fix many problems (models appearing twice, irrelevant coefficients in a relevant column among others)
---

diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R
index 1739204..5a69a52 100644
--- a/pkg/R/EMGLLF.R
+++ b/pkg/R/EMGLLF.R
@@ -23,168 +23,170 @@
 #'
 #' @export
 EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
-	mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE)
+                   mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE)
 {
-	if (!fast)
-	{
-		# Function in R
-		return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau))
-	}
-
-	# Function in C
-	n = nrow(X) #nombre d'echantillons
-	p = ncol(X) #nombre de covariables
-	m = ncol(Y) #taille de Y (multivariÃ©)
-	k = length(piInit) #nombre de composantes dans le mÃ©lange
-	.Call("EMGLLF",
-		phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, tau,
-		phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi),
-			S=double(p*m*k), affec=integer(n),
-		n, p, m, k,
-		PACKAGE="valse")
+  if (!fast)
+  {
+    # Function in R
+    return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau))
+  }
+  
+  # Function in C
+  n = nrow(X) #nombre d'echantillons
+  p = ncol(X) #nombre de covariables
+  m = ncol(Y) #taille de Y (multivariÃ©)
+  k = length(piInit) #nombre de composantes dans le mÃ©lange
+  .Call("EMGLLF",
+        phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, tau,
+        phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi),
+        S=double(p*m*k), affec=integer(n),
+        n, p, m, k,
+        PACKAGE="valse")
 }
 
 # R version - slow but easy to read
-.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
+.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X2,Y,tau)
 {
-	# Matrix dimensions
-	n = dim(X)[1]
-	p = dim(phiInit)[1]
-	m = dim(phiInit)[2]
-	k = dim(phiInit)[3]
-
-	# Outputs
-	phi = phiInit
-	rho = rhoInit
-	pi = piInit
-	llh = -Inf
-	S = array(0, dim=c(p,m,k))
-
-	# Algorithm variables
-	gam = gamInit
-	Gram2 = array(0, dim=c(p,p,k))
-	ps2 = array(0, dim=c(p,m,k))
-	X2 = array(0, dim=c(n,p,k))
-	Y2 = array(0, dim=c(n,m,k))
-	EPS = 1e-15
-
-	for (ite in 1:maxi)
-	{
-		# Remember last pi,rho,phi values for exit condition in the end of loop
-		Phi = phi
-		Rho = rho
-		Pi = pi
-
-		# Calcul associÃ© Ã  Y et X
-		for (r in 1:k)
-		{
-			for (mm in 1:m)
-				Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
-			for (i in 1:n)
-				X2[i,,r] = sqrt(gam[i,r]) * X[i,]
-			for (mm in 1:m)
-				ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
-			for (j in 1:p)
-			{
-				for (s in 1:p)
-					Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
-			}
-		}
-
-		##########
-		#Etape M #
-		##########
-
-		# Pour pi
-		b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
-		gam2 = colSums(gam)
-		a = sum(gam %*% log(pi))
-
-		# Tant que les props sont negatives
-		kk = 0
-		pi2AllPositive = FALSE
-		while (!pi2AllPositive)
-		{
-			pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
-			pi2AllPositive = all(pi2 >= 0)
-			kk = kk+1
-		}
-
-		# t(m) la plus grande valeur dans la grille O.1^k tel que ce soit dÃ©croissante ou constante
-		while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
-			-sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
-		{
-			pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
-			kk = kk + 1
-		}
-		t = 0.1^kk
-		pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
-		#Pour phi et rho
-		for (r in 1:k)
-		{
-			for (mm in 1:m)
-			{
-				ps = 0
-				for (i in 1:n)
-					ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
-				nY2 = sum(Y2[,mm,r]^2)
-				rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
-			}
-		}
-
-		for (r in 1:k)
-		{
-			for (j in 1:p)
-			{
-				for (mm in 1:m)
-				{
-					S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
-					if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
-						phi[j,mm,r]=0
-					else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
-						phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
-					else
-						phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
-				}
-			}
-		}
-
-		##########
-		#Etape E #
-		##########
-
-		# Precompute det(rho[,,r]) for r in 1...k
-		detRho = sapply(1:k, function(r) det(rho[,,r]))
-
-		sumLogLLH = 0
-		for (i in 1:n)
-		{
-			# Update gam[,]
-			sumGamI = 0
-			for (r in 1:k)
-			{
-				gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
-				sumGamI = sumGamI + gam[i,r]
-			}
-			sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
-			if (sumGamI > EPS) #else: gam[i,] is already ~=0
-				gam[i,] = gam[i,] / sumGamI
-		}
-
-		sumPen = sum(pi^gamma * b)
-		last_llh = llh
-		llh = -sumLogLLH/n + lambda*sumPen
-		dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
-		Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
-		Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
-		Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
-		dist2 = max(Dist1,Dist2,Dist3)
-
-		if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
-			break
-	}
-
-	affec = apply(gam, 1, which.max)
-	list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
+  # Matrix dimensions
+  n = dim(Y)[1]
+  if (length(dim(phiInit)) == 2){
+    p = 1
+    m = dim(phiInit)[1]
+    k = dim(phiInit)[2]
+  } else { 
+    p = dim(phiInit)[1]
+    m = dim(phiInit)[2]
+    k = dim(phiInit)[3]
+  }
+  X = matrix(nrow = n, ncol = p)
+  X[1:n,1:p] = X2
+  # Outputs
+  phi = array(NA, dim = c(p,m,k))
+  phi[1:p,,] = phiInit
+  rho = rhoInit
+  pi = piInit
+  llh = -Inf
+  S = array(0, dim=c(p,m,k))
+  
+  # Algorithm variables
+  gam = gamInit
+  Gram2 = array(0, dim=c(p,p,k))
+  ps2 = array(0, dim=c(p,m,k))
+  X2 = array(0, dim=c(n,p,k))
+  Y2 = array(0, dim=c(n,m,k))
+  EPS = 1e-15
+  
+  for (ite in 1:maxi)
+  {
+    # Remember last pi,rho,phi values for exit condition in the end of loop
+    Phi = phi
+    Rho = rho
+    Pi = pi
+    
+    # Computations associated to X and Y
+    for (r in 1:k)
+    {
+      for (mm in 1:m)
+        Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
+      for (i in 1:n)
+        X2[i,,r] = sqrt(gam[i,r]) * X[i,]
+      for (mm in 1:m)
+        ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+      for (j in 1:p)
+      {
+        for (s in 1:p)
+          Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+      }
+    }
+    
+    #########
+    #M step #
+    #########
+    
+    # For pi
+    b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
+    gam2 = colSums(gam)
+    a = sum(gam %*% log(pi))
+    
+    # While the proportions are nonpositive
+    kk = 0
+    pi2AllPositive = FALSE
+    while (!pi2AllPositive)
+    {
+      pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
+      pi2AllPositive = all(pi2 >= 0)
+      kk = kk+1
+    }
+    
+    # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
+    while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+           -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
+    {
+      pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
+      kk = kk + 1
+    }
+    t = 0.1^kk
+    pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+    
+    #For phi and rho
+    for (r in 1:k)
+    {
+      for (mm in 1:m)
+      {
+        ps = 0
+        for (i in 1:n)
+          ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
+        nY2 = sum(Y2[,mm,r]^2)
+        rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
+      }
+    }
+    
+    for (r in 1:k)
+    {
+      for (j in 1:p)
+      {
+        for (mm in 1:m)
+        {
+          S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
+          if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
+            phi[j,mm,r]=0
+          else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
+            phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
+          else
+            phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
+        }
+      }
+    }
+    
+    ########
+    #E step#
+    ########
+    
+    # Precompute det(rho[,,r]) for r in 1...k
+    detRho = sapply(1:k, function(r) det(rho[,,r]))
+    gam1 = matrix(0, nrow = n, ncol = k)
+    for (i in 1:n)
+    {
+      # Update gam[,]
+      for (r in 1:k)
+      {
+        gam1[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
+      }
+    }
+    gam = gam1 / rowSums(gam1)
+    sumLogLLH = sum(log(rowSums(gam)) - log((2*base::pi)^(m/2)))
+    sumPen = sum(pi^gamma * b)
+    last_llh = llh
+    llh = -sumLogLLH/n + lambda*sumPen
+    dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
+    Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
+    Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
+    Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
+    dist2 = max(Dist1,Dist2,Dist3)
+    
+    if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
+      break
+  }
+  
+  list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S)
 }
diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R
index 227dfdc..b864985 100644
--- a/pkg/R/constructionModelesLassoMLE.R
+++ b/pkg/R/constructionModelesLassoMLE.R
@@ -31,11 +31,9 @@ constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini,
 		p = dim(phiInit)[1]
 		m = dim(phiInit)[2]
 		k = dim(phiInit)[3]
-
 		sel.lambda = S[[lambda]]$selected
 #		col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
 		col.sel <- which( sapply(sel.lambda,length) > 0 ) #if list of selected vars
-
 		if (length(col.sel) == 0)
 			return (NULL)
 
@@ -49,14 +47,16 @@ constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini,
 		piLambda = res$pi
 		phiLambda = array(0, dim = c(p,m,k))
 		for (j in seq_along(col.sel))
-			phiLambda[col.sel[j],,] = phiLambda2[j,,]
+			phiLambda[col.sel[j],sel.lambda[[j]],] = phiLambda2[j,sel.lambda[[j]],]
 		dimension = length(unlist(sel.lambda))
 
 		# Computation of the loglikelihood
 		densite = vector("double",n)
 		for (r in 1:k)
 		{
-			delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r]))
+		  if (length(col.sel)==1){
+		    delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%t(phiLambda[col.sel,,r])))
+		  } else delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r]))
 			densite = densite + piLambda[r] *
 				det(rhoLambda[,,r])/(sqrt(2*base::pi))^m * exp(-diag(tcrossprod(delta))/2.0)
 		}
diff --git a/pkg/R/main.R b/pkg/R/main.R
index 89c4bcd..72ee724 100644
--- a/pkg/R/main.R
+++ b/pkg/R/main.R
@@ -26,109 +26,111 @@
 #' #TODO: a few examples
 #' @export
 valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10, maxi=50,
-	eps=1e-4, kmin=2, kmax=4, rank.min=1, rank.max=10, ncores_outer=1, ncores_inner=1,
-	size_coll_mod=50, fast=TRUE, verbose=FALSE, plot = TRUE)
+                 eps=1e-4, kmin=2, kmax=2, rank.min=1, rank.max=10, ncores_outer=1, ncores_inner=1,
+                 size_coll_mod=10, fast=TRUE, verbose=FALSE, plot = TRUE)
 {
   p = dim(X)[2]
   m = dim(Y)[2]
   n = dim(X)[1]
-
+  
   if (verbose)
-		print("main loop: over all k and all lambda")
-
-	if (ncores_outer > 1)
-	{
-		cl = parallel::makeCluster(ncores_outer, outfile='')
-		parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure",
-			"selecMod","gamma","mini","maxi","eps","kmin","kmax","rang.min","rang.max",
-			"ncores_outer","ncores_inner","verbose","p","m") )
-	}
-
-	# Compute models with k components
-	computeModels <- function(k)
-	{
-		if (ncores_outer > 1)
-			require("valse") #nodes start with an empty environment
-
-		if (verbose)
-			print(paste("Parameters initialization for k =",k))
-		#smallEM initializes parameters by k-means and regression model in each component,
+    print("main loop: over all k and all lambda")
+  
+  if (ncores_outer > 1)
+  {
+    cl = parallel::makeCluster(ncores_outer, outfile='')
+    parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure",
+                                                                   "selecMod","gamma","mini","maxi","eps","kmin","kmax","rang.min","rang.max",
+                                                                   "ncores_outer","ncores_inner","verbose","p","m") )
+  }
+  
+  # Compute models with k components
+  computeModels <- function(k)
+  {
+    if (ncores_outer > 1)
+      require("valse") #nodes start with an empty environment
+    
+    if (verbose)
+      print(paste("Parameters initialization for k =",k))
+    #smallEM initializes parameters by k-means and regression model in each component,
     #doing this 20 times, and keeping the values maximizing the likelihood after 10
     #iterations of the EM algorithm.
     P = initSmallEM(k, X, Y)
     grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, X, Y,
-			gamma, mini, maxi, eps, fast)
+                                     gamma, mini, maxi, eps, fast)
     if (length(grid_lambda)>size_coll_mod)
       grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
-
-		if (verbose)
-			print("Compute relevant parameters")
+    
+    if (verbose)
+      print("Compute relevant parameters")
     #select variables according to each regularization parameter
     #from the grid: S$selected corresponding to selected variables
     S = selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma,
-			grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps?
+                        grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps?
     
     if (procedure == 'LassoMLE')
-		{
+    {
       if (verbose)
-				print('run the procedure Lasso-MLE')
+        print('run the procedure Lasso-MLE')
       #compute parameter estimations, with the Maximum Likelihood
       #Estimator, restricted on selected variables.
       models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
-				mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, fast, verbose)
+                                            mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, fast, verbose)
     }
-		else
-		{
+    else
+    {
       if (verbose)
-				print('run the procedure Lasso-Rank')
+        print('run the procedure Lasso-Rank')
       #compute parameter estimations, with the Low Rank
       #Estimator, restricted on selected variables.
       models <- constructionModelesLassoRank(S$Pi, S$Rho, mini, maxi, X, Y, eps, S,
-				rank.min, rank.max, ncores_inner, fast, verbose)
+                                             rank.min, rank.max, ncores_inner, fast, verbose)
     }
-		#attention certains modeles sont NULL aprÃ¨s selectVariables
-		models = models[sapply(models, function(cell) !is.null(cell))]
+    #warning! Some models are NULL after running selectVariables
+    models = models[sapply(models, function(cell) !is.null(cell))]
     models
   }
-
-	# List (index k) of lists (index lambda) of models
-	models_list <-
-	  if (ncores_outer > 1)
-			parLapply(cl, kmin:kmax, computeModels)
-		else
-			lapply(kmin:kmax, computeModels)
-	if (ncores_outer > 1)
-		parallel::stopCluster(cl)
-
-	if (! requireNamespace("capushe", quietly=TRUE))
-	{
-		warning("'capushe' not available: returning all models")
-		return (models_list)
-	}
-
-	# Get summary "tableauRecap" from models
-	tableauRecap = do.call( rbind, lapply( seq_along(models_list), function(i) {
-		models <- models_list[[i]]
-		#Pour un groupe de modeles (mÃªme k, diffÃ©rents lambda):
-		LLH <- sapply( models, function(model) model$llh[1] )
-		k = length(models[[1]]$pi)
-		sumPen = sapply(models, function(model)
-		  k*(dim(model$rho)[1]+sum(model$phi[,,1]!=0)+1)-1)
-		data.frame(model=paste(i,".",seq_along(models),sep=""),
-			pen=sumPen/n, complexity=sumPen, contrast=-LLH)
-	} ) )
-print(tableauRecap)
+  
+  # List (index k) of lists (index lambda) of models
+  models_list <-
+    if (ncores_outer > 1)
+      parLapply(cl, kmin:kmax, computeModels)
+  else
+    lapply(kmin:kmax, computeModels)
+  if (ncores_outer > 1)
+    parallel::stopCluster(cl)
+  
+  if (! requireNamespace("capushe", quietly=TRUE))
+  {
+    warning("'capushe' not available: returning all models")
+    return (models_list)
+  }
+  
+  # Get summary "tableauRecap" from models
+  tableauRecap = do.call( rbind, lapply( seq_along(models_list), function(i) {
+    models <- models_list[[i]]
+    #For a collection of models (same k, several lambda):
+    LLH <- sapply( models, function(model) model$llh[1] )
+    k = length(models[[1]]$pi)
+    sumPen = sapply(models, function(model)
+      k*(dim(model$rho)[1]+sum(model$phi[,,1]!=0)+1)-1)
+    data.frame(model=paste(i,".",seq_along(models),sep=""),
+               pen=sumPen/n, complexity=sumPen, contrast=-LLH)
+  } ) )
+  
+  print(tableauRecap)
+  tableauRecap = tableauRecap[which(tableauRecap[,4]!= Inf),]
   modSel = capushe::capushe(tableauRecap, n)
   indModSel <-
-		if (selecMod == 'DDSE')
-			as.numeric(modSel@DDSE@model)
-		else if (selecMod == 'Djump')
-			as.numeric(modSel@Djump@model)
-		else if (selecMod == 'BIC')
-			modSel@BIC_capushe$model
-		else if (selecMod == 'AIC')
-			modSel@AIC_capushe$model
-
+    if (selecMod == 'DDSE')
+      as.numeric(modSel@DDSE@model)
+  else if (selecMod == 'Djump')
+    as.numeric(modSel@Djump@model)
+  else if (selecMod == 'BIC')
+    modSel@BIC_capushe$model
+  else if (selecMod == 'AIC')
+    modSel@AIC_capushe$model
+  
   mod = as.character(tableauRecap[indModSel,1])
   listMod = as.integer(unlist(strsplit(mod, "[.]")))
   modelSel = models_list[[listMod[1]]][[listMod[2]]]
@@ -144,8 +146,8 @@ print(tableauRecap)
   Gam = Gam/rowSums(Gam)
   modelSel$affec = apply(Gam, 1,which.max)
   modelSel$proba = Gam
-
-    if (plot){
+  
+  if (plot){
     print(plot_valse(modelSel,n))
   }
   
diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R
index 120196d..0963946 100644
--- a/pkg/R/plot_valse.R
+++ b/pkg/R/plot_valse.R
@@ -10,7 +10,7 @@
 #'
 #' @export
 #'
-plot_valse = function(model,n){
+plot_valse = function(model,n, comp = FALSE, k1 = NA, k2 = NA){
   require("gridExtra")
   require("ggplot2")
   require("reshape2")
@@ -28,13 +28,15 @@ plot_valse = function(model,n){
   print(gReg)
   
   ## Differences between two clusters
-  k1 = 1
-  k2 = 2
-  Melt = melt(t(model$phi[,,k1]-model$phi[,,k2]))
-  gDiff = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) +  geom_tile() + 
-    scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,  space = "Lab") +
-    ggtitle(paste("Difference between regression matrices in cluster",k1, "and", k2))
-  print(gDiff)
+  if (comp){
+    if (is.na(k1) || is.na(k)){print('k1 and k2 must be integers, representing the clusters you want to compare')}
+    Melt = melt(t(model$phi[,,k1]-model$phi[,,k2]))
+    gDiff = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) +  geom_tile() + 
+      scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,  space = "Lab") +
+      ggtitle(paste("Difference between regression matrices in cluster",k1, "and", k2))
+    print(gDiff)
+    
+  }
   
   ### Covariance matrices
   matCov = matrix(NA, nrow = dim(model$rho[,,1])[1], ncol = K)
@@ -47,23 +49,27 @@ plot_valse = function(model,n){
     ggtitle("Covariance matrices")
   print(gCov )
   
-  ### proportions
+  ### Proportions
   gam2 = matrix(NA, ncol = K, nrow = n)
   for (i in 1:n){
-    gam2[i, ] = c(model$Gam[i, model$affec[i]], model$affec[i])
+    gam2[i, ] = c(model$proba[i, model$affec[i]], model$affec[i])
   }
   
   bp <- ggplot(data.frame(gam2), aes(x=X2, y=X1, color=X2, group = X2)) +
     geom_boxplot() + theme(legend.position = "none")+ background_grid(major = "xy", minor = "none")
-  print(bp )
+  print(bp)
   
   ### Mean in each cluster
   XY = cbind(X,Y)
   XY_class= list()
   meanPerClass= matrix(0, ncol = K, nrow = dim(XY)[2])
   for (r in 1:K){
-    XY_class[[r]] = XY[affec == r, ]
-    meanPerClass[,r] = apply(XY_class[[r]], 2, mean)
+    XY_class[[r]] = XY[model$affec == r, ]
+    if (sum(model$affec==r) == 1){
+      meanPerClass[,r] = XY_class[[r]]
+    } else {
+      meanPerClass[,r] = apply(XY_class[[r]], 2, mean)
+    }
   }
   data = data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2],K))
   g = ggplot(data, aes(x=time, y = mean, group = cluster, color = cluster))
diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R
index b23eac2..65fbde5 100644
--- a/pkg/R/selectVariables.R
+++ b/pkg/R/selectVariables.R
@@ -23,46 +23,53 @@
 #' @export
 #'
 selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
-	X,Y,thresh,tau, ncores=3, fast=TRUE)
+                           X,Y,thresh,tau, ncores=3, fast=TRUE)
 {
-	if (ncores > 1)
-	{
-		cl = parallel::makeCluster(ncores, outfile='')
-		parallel::clusterExport(cl=cl,
-			varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
-			envir=environment())
-	}
-
-	# Calcul pour un lambda
-	computeCoefs <- function(lambda)
-	{
-		params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
-
-		p = dim(phiInit)[1]
-		m = dim(phiInit)[2]
-
-		#selectedVariables: list where element j contains vector of selected variables in [1,m]
-		selectedVariables = lapply(1:p, function(j) {
-			#from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
-			#and finally return the corresponding indices
-		  seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
-		})
-
-		list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
-	}
-
-	# Pour chaque lambda de la grille, on calcule les coefficients
-	out <-
-		if (ncores > 1)
-			parLapply(cl, glambda, computeCoefs)
-		else
-			lapply(glambda, computeCoefs)
-	if (ncores > 1)
-		parallel::stopCluster(cl)
-
-	# Suppression doublons
-	sha1_array <- lapply(out, digest::sha1)
-	out[ !duplicated(sha1_array) ]
-
-	out
+  if (ncores > 1)
+  {
+    cl = parallel::makeCluster(ncores, outfile='')
+    parallel::clusterExport(cl=cl,
+                            varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
+                            envir=environment())
+  }
+  
+  # Computation for a fixed lambda
+  computeCoefs <- function(lambda)
+  {
+    params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
+    
+    p = dim(phiInit)[1]
+    m = dim(phiInit)[2]
+    
+    #selectedVariables: list where element j contains vector of selected variables in [1,m]
+    selectedVariables = lapply(1:p, function(j) {
+      #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
+      #and finally return the corresponding indices
+      seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
+    })
+    
+    list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
+  }
+  
+  # For each lambda in the grid, we compute the coefficients
+  out <-
+    if (ncores > 1)
+      parLapply(cl, glambda, computeCoefs)
+  else
+    lapply(glambda, computeCoefs)
+  if (ncores > 1)
+    parallel::stopCluster(cl)
+  # Suppress models which are computed twice
+  #En fait, ca ca fait la comparaison de tous les parametres
+  #On veut juste supprimer ceux qui ont les memes variables sÃ©lectionnÃ©es
+  #sha1_array <- lapply(out, digest::sha1)
+  #out[ duplicated(sha1_array) ]
+  selec = lapply(out, function(model) model$selected)
+  ind_dup = duplicated(selec)
+  ind_uniq = which(!ind_dup)
+  out2 = list()
+  for (l in 1:length(ind_uniq)){
+    out2[[l]] = out[[ind_uniq[l]]]
+  }
+  out2
 }
diff --git a/reports/simulData_17mars.R b/reports/simulData_17mars.R
index 252e660..52148fd 100644
--- a/reports/simulData_17mars.R
+++ b/reports/simulData_17mars.R
@@ -5,10 +5,10 @@ simulData_17mars = function(ite){
   ## Modele
   ###########
   K = 2
-  p = 20
+  p = 48
   T = seq(0,1.5,length.out = p)
   T2 = seq(0,3, length.out = 2*p)
-  n = 30
+  n = 100
   x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1
   sigmaX = 0.12
   sigmaY = 0.12
@@ -24,8 +24,6 @@ simulData_17mars = function(ite){
   ###########
   require(wavelets)
   XY = array(0, dim = c(2*p,n))
-  Xproj = array(0, dim=c(48,n))
-  Yproj = array(0, dim=c(48,n))
   XYproj = array(0, dim=c(96,n))
   x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n)
   affec = sample(c(1,2), n, replace = TRUE)
@@ -47,14 +45,12 @@ simulData_17mars = function(ite){
     Ay = dwt(y[,i], filter='haar')@V
     Ay = rev(unlist(Ay))
     Ay = Ay[2:(1+3)]
-    Xproj[,i] = c(Ax,Dx)
-    Yproj[,i] = c(Ay,Dy)
     XYproj[,i] = c(Ax,Dx,Ay,Dy)
   }
   
-  res_valse = valse(x,y)
-  res_valse_proj = valse(Xproj, Yproj)
+  res_valse = valse(x,y, kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 200)
+  res_valse_proj = valse(XYproj[1:p,],XYproj[(p+1):(2*p),], kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 200)
   
-  save(res_valse,file=paste("./Out/Res_",ite, ".RData",sep=""))
-  save(res_valse_proj,file=paste("./Out/ResProj_",ite, ".RData",sep=""))
+  save(res_valse,file=paste("Res_",ite, ".RData",sep=""))
+  save(res_valse_proj,file=paste("ResProj_",ite, ".RData",sep=""))
 }