From 321e13a991a5a0e6c97225fdca436870e5e805d1 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Tue, 11 Apr 2017 11:57:11 +0200
Subject: [PATCH] fix test; EMGLLF.c != EMGLLF.R now...

---
 CCC.R                                         |  86 ------
 pkg/R/generateXY.R                            |   8 +-
 test/generate_test_data/EMGLLF.R              | 257 +++++++++---------
 .../generateRunSaveTest_EMGLLF.R              |   6 +-
 .../generateRunSaveTest_EMGrank.R             |   2 +-
 test/generate_test_data/helper.R              |  23 +-
 test/test.EMGLLF.c                            |  10 +-
 7 files changed, 148 insertions(+), 244 deletions(-)
 delete mode 100644 CCC.R

diff --git a/CCC.R b/CCC.R
deleted file mode 100644
index 9a17c08..0000000
--- a/CCC.R
+++ /dev/null
@@ -1,86 +0,0 @@
-#' constructionModelesLassoMLE
-#'
-#' TODO: description
-#'
-#' @param ...
-#'
-#' @return ...
-#'
-#' export
-constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini, maxi,
-	gamma, X, Y, seuil, tau, selected, ncores=3, verbose=FALSE)
-{
-	if (ncores > 1)
-	{
-		cl = parallel::makeCluster(ncores)
-		parallel::clusterExport( cl, envir=environment(),
-			varlist=c("phiInit","rhoInit","gamInit","mini","maxi","gamma","X","Y","seuil",
-			"tau","selected","ncores","verbose") )
-	}
-
-	# Individual model computation
-	computeAtLambda <- function(lambda)
-	{
-		if (ncores > 1)
-			require("valse") #// nodes start with an ampty environment
-
-		if (verbose)
-			print(paste("Computations for lambda=",lambda))
-
-		n = dim(X)[1]
-		p = dim(phiInit)[1]
-		m = dim(phiInit)[2]
-		k = dim(phiInit)[3]
-
-		sel.lambda = selected[[lambda]]
-#		col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
-		col.sel <- which( sapply(sel.lambda,length) > 0 ) #if list of selected vars
-
-		if (length(col.sel) == 0)
-			return (NULL)
-
-		# lambda == 0 because we compute the EMV: no penalization here
-		res = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0,
-			X[,col.sel],Y,tau)
-		
-		# Eval dimension from the result + selected
-		phiLambda2 = res_EM$phi
-		rhoLambda = res_EM$rho
-		piLambda = res_EM$pi
-		phiLambda = array(0, dim = c(p,m,k))
-		for (j in seq_along(col.sel))
-			phiLambda[col.sel[j],,] = phiLambda2[j,,]
-
-		dimension = 0
-		for (j in 1:p)
-		{
-			b = setdiff(1:m, sel.lambda[,j])
-			if (length(b) > 0)
-				phiLambda[j,b,] = 0.0
-			dimension = dimension + sum(sel.lambda[,j]!=0)
-		}
-
-		# on veut calculer la vraisemblance avec toutes nos estimations
-		densite = vector("double",n)
-		for (r in 1:k)
-		{
-			delta = Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r])
-			densite = densite + piLambda[r] *
-				det(rhoLambda[,,r])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0)
-		}
-		llhLambda = c( sum(log(densite)), (dimension+m+1)*k-1 )
-		list("phi"= phiLambda, "rho"= rhoLambda, "pi"= piLambda, "llh" = llhLambda)
-	}
-
-	#Pour chaque lambda de la grille, on calcule les coefficients
-	out =
-		if (ncores > 1)
-			parLapply(cl, glambda, computeAtLambda)
-		else
-			lapply(glambda, computeAtLambda)
-
-	if (ncores > 1)
-		parallel::stopCluster(cl)
-
-	out
-}
diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R
index 496d4e5..069c470 100644
--- a/pkg/R/generateXY.R
+++ b/pkg/R/generateXY.R
@@ -17,12 +17,12 @@ generateXY = function(n, π, meanX, β, covX, covY)
 	p <- dim(covX)[1]
 	m <- dim(covY)[1]
 	k <- dim(covY)[3]
-	
+
 	X <- matrix(nrow=0, ncol=p)
 	Y <- matrix(nrow=0, ncol=m)
 
 	#random generation of the size of each population in X~Y (unordered)
-	sizePop <- rmultinom(1, n, pi)
+	sizePop <- rmultinom(1, n, π)
 	class <- c() #map i in 1:n --> index of class in 1:k
 
 	for (i in 1:k)
@@ -30,8 +30,8 @@ generateXY = function(n, π, meanX, β, covX, covY)
 		class <- c(class, rep(i, sizePop[i]))
 		newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
 		X <- rbind( X, newBlockX )
-		Y <- rbind( Y, apply( newBlockX, 1, function(row)
-			mvrnorm(1, row %*% beta[,,i], covY[,,i]) ) )
+		Y <- rbind( Y, t(apply( newBlockX, 1, function(row)
+			MASS::mvrnorm(1, row %*% β[,,i], covY[,,i]) )) )
 	}
 
 	shuffle = sample(n)
diff --git a/test/generate_test_data/EMGLLF.R b/test/generate_test_data/EMGLLF.R
index 039e291..09ae2e3 100644
--- a/test/generate_test_data/EMGLLF.R
+++ b/test/generate_test_data/EMGLLF.R
@@ -1,156 +1,143 @@
 EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
 {
-  #matrix dimensions
-  n = dim(X)[1]
-  p = dim(phiInit)[1]
-  m = dim(phiInit)[2]
-  k = dim(phiInit)[3]
-  
-  #init outputs
-  phi = phiInit
-  rho = rhoInit
-  pi = piInit
-  LLF = rep(0, maxi)
-  S = array(0, dim=c(p,m,k))
-  
-  gam = gamInit
-  Gram2 = array(0, dim=c(p,p,k))
-  ps2 = array(0, dim=c(p,m,k))
-  b = rep(0, k)
-  X2 = array(0, dim=c(n,p,k))
-  Y2 = array(0, dim=c(n,m,k))
-  dist = 0
-  dist2 = 0
-  ite = 1
-  pi2 = rep(0, k)
-  ps = matrix(0, m,k)
-  nY2 = matrix(0, m,k)
-  ps1 = array(0, dim=c(n,m,k))
-  Gam = matrix(0, n,k)
-  EPS = 1E-15
-  
-  while(ite <= mini || (ite<= maxi && (dist>= tau || dist2 >= sqrt(tau))))
+	# Matrix dimensions
+	n = dim(X)[1]
+	p = dim(phiInit)[1]
+	m = dim(phiInit)[2]
+	k = dim(phiInit)[3]
+
+	# Outputs
+	phi = phiInit
+	rho = rhoInit
+	pi = piInit
+	llh = -Inf
+	S = array(0, dim=c(p,m,k))
+
+	# Algorithm variables
+	gam = gamInit
+	Gram2 = array(0, dim=c(p,p,k))
+	ps2 = array(0, dim=c(p,m,k))
+	X2 = array(0, dim=c(n,p,k))
+	Y2 = array(0, dim=c(n,m,k))
+	EPS = 1e-15
+
+	for (ite in 1:maxi)
 	{
-    Phi = phi
-    Rho = rho
-    Pi = pi
+		# Remember last pi,rho,phi values for exit condition in the end of loop
+		Phi = phi
+		Rho = rho
+		Pi = pi
 
-    #calcul associé à Y et X
-    for(r in 1:k)
+		# Calcul associé à Y et X
+		for (r in 1:k)
 		{
-      for (mm in 1:m)
-        Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
-      for (i in 1:n)
-        X2[i,,r] = sqrt(gam[i,r]) * X[i,]
-      for (mm in 1:m)
-        ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
-      for (j in 1:p)
+			for (mm in 1:m)
+				Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
+			for (i in 1:n)
+				X2[i,,r] = sqrt(gam[i,r]) * X[i,]
+			for (mm in 1:m)
+				ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+			for (j in 1:p)
 			{
-        for (s in 1:p)
-          Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
-      }
-    }
-
-    ##########
-    #Etape M #
-    ##########
-    
-    #pour pi
-    for (r in 1:k)
-      b[r] = sum(abs(phi[,,r]))
-    gam2 = colSums(gam)
-    a = sum(gam %*% log(pi))
-
-    #tant que les props sont negatives
-    kk = 0
-    pi2AllPositive = FALSE
-    while (!pi2AllPositive)
+				for (s in 1:p)
+					Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+			}
+		}
+
+		##########
+		#Etape M #
+		##########
+
+		# Pour pi
+		b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
+		gam2 = colSums(gam)
+		a = sum(gam %*% log(pi))
+
+		# Tant que les props sont negatives
+		kk = 0
+		pi2AllPositive = FALSE
+		while (!pi2AllPositive)
 		{
-      pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
-      pi2AllPositive = all(pi2 >= 0)
-      kk = kk+1
-    }
+			pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
+			pi2AllPositive = all(pi2 >= 0)
+			kk = kk+1
+		}
 
-    #t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
-    while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+		# t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
+		while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
 			-sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
 		{
-      pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
-      kk = kk + 1
-    }
-    t = 0.1^kk
-    pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
-    #Pour phi et rho
-    for (r in 1:k)
+			pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
+			kk = kk + 1
+		}
+		t = 0.1^kk
+		pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+
+		#Pour phi et rho
+		for (r in 1:k)
 		{
-      for (mm in 1:m)
+			for (mm in 1:m)
 			{
-        for (i in 1:n)
-				{
-          ps1[i,mm,r] = Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
-        }
-        ps[mm,r] = sum(ps1[,mm,r])
-        nY2[mm,r] = sum(Y2[,mm,r]^2)
-        rho[mm,mm,r] = (ps[mm,r]+sqrt(ps[mm,r]^2+4*nY2[mm,r]*gam2[r])) / (2*nY2[mm,r])
+				ps = 0
+				for (i in 1:n)
+					ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
+				nY2 = sum(Y2[,mm,r]^2)
+				rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
 			}
-    }
+		}
 
-    for (r in 1:k)
+		for (r in 1:k)
 		{
-      for (j in 1:p)
+			for (j in 1:p)
 			{
-        for (mm in 1:m)
+				for (mm in 1:m)
 				{
-          S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
+					S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
 					if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
-            phi[j,mm,r]=0
-          else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
-            phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
-          else
-            phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
-        }
-      }
-    }
-
-    ##########
-    #Etape E #
-    ##########
-
-		sumLogLLF2 = 0
-    for (i in 1:n)
+						phi[j,mm,r]=0
+					else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
+						phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
+					else
+						phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
+				}
+			}
+		}
+
+		##########
+		#Etape E #
+		##########
+
+		# Precompute det(rho[,,r]) for r in 1...k
+		detRho = sapply(1:k, function(r) det(rho[,,r]))
+
+		sumLogLLH = 0
+		for (i in 1:n)
 		{
-      #precompute sq norms to numerically adjust their values
-      sqNorm2 = rep(0,k)
-      for (r in 1:k)
-        sqNorm2[r] = sum( (Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2 )
-
-      #compute Gam[,]
-      sumLLF1 = 0.0;
-      for (r in 1:k)
+			# Update gam[,]
+			sumGamI = 0
+			for (r in 1:k)
 			{
-				Gam[i,r] = pi[r] * exp(-0.5*sqNorm2[r]) * det(rho[,,r])
-        sumLLF1 = sumLLF1 + Gam[i,r] / (2*base::pi)^(m/2)
-      }
-      sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
-      sumGamI = sum(Gam[i,])
-      if(sumGamI > EPS)
-        gam[i,] = Gam[i,] / sumGamI
-      else
-        gam[i,] = rep(0,k)
-    }
-
-    sumPen = sum(pi^gamma * b)
-    LLF[ite] = -sumLogLLF2/n + lambda*sumPen
-    dist = ifelse( ite == 1, LLF[ite], (LLF[ite]-LLF[ite-1]) / (1+abs(LLF[ite])) )
-    Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
-    Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
-    Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
-    dist2 = max(Dist1,Dist2,Dist3)
-
-    ite = ite+1
-  }
-  
-  affec = apply(gam, 1, which.max)
-  return(list("phi"=phi, "rho"=rho, "pi"=pi, "LLF"=LLF, "S"=S, "affec" = affec ))
+				gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
+				sumGamI = sumGamI + gam[i,r]
+			}
+			sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
+			if (sumGamI > EPS) #else: gam[i,] is already ~=0
+				gam[i,] = gam[i,] / sumGamI
+		}
+
+		sumPen = sum(pi^gamma * b)
+		last_llh = llh
+		llh = -sumLogLLH/n + lambda*sumPen
+		dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
+		Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
+		Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
+		Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
+		dist2 = max(Dist1,Dist2,Dist3)
+
+		if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
+			break
+	}
+
+	affec = apply(gam, 1, which.max)
+	list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
 }
diff --git a/test/generate_test_data/generateRunSaveTest_EMGLLF.R b/test/generate_test_data/generateRunSaveTest_EMGLLF.R
index bf68ab3..8fe1f33 100644
--- a/test/generate_test_data/generateRunSaveTest_EMGLLF.R
+++ b/test/generate_test_data/generateRunSaveTest_EMGLLF.R
@@ -8,8 +8,8 @@ generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10,
 	dir.create(testFolder, showWarnings=FALSE, mode="0755")
 
 	require(valse)
-	params = valse:::basicInitParameters(n, p, m, k)
-	xy = valse:::generateXYdefault(n, p, m, k)
+	params = basicInitParameters(n, p, m, k)
+	xy = generateXYdefault(n, p, m, k)
 
 	#save inputs
 	write.table(as.double(params$phiInit), paste(testFolder,"phiInit",sep=""),
@@ -44,7 +44,7 @@ generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10,
 	write.table(as.double(res$phi), paste(testFolder,"phi",sep=""), row.names=F, col.names=F)
 	write.table(as.double(res$rho), paste(testFolder,"rho",sep=""), row.names=F, col.names=F)
 	write.table(as.double(res$pi), paste(testFolder,"pi",sep=""), row.names=F, col.names=F)
-	write.table(as.double(res$LLF), paste(testFolder,"LLF",sep=""), row.names=F, col.names=F)
+	write.table(as.double(res$llh), paste(testFolder,"llh",sep=""), row.names=F, col.names=F)
 	write.table(as.double(res$S), paste(testFolder,"S",sep=""), row.names=F, col.names=F)
 	write.table(as.integer(res$affec), paste(testFolder,"affec",sep=""), row.names=F, col.names=F)
 }
diff --git a/test/generate_test_data/generateRunSaveTest_EMGrank.R b/test/generate_test_data/generateRunSaveTest_EMGrank.R
index f348d71..14c1b2a 100644
--- a/test/generate_test_data/generateRunSaveTest_EMGrank.R
+++ b/test/generate_test_data/generateRunSaveTest_EMGrank.R
@@ -10,7 +10,7 @@ generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10,
   for(i in 1:k)
     rho[,,i] = diag(1,m)
 	require(valse)
-  xy = valse:::generateXYdefault(n, p, m, k)
+  xy = generateXYdefault(n, p, m, k)
 
   testFolder = "../data/"
   dir.create(testFolder, showWarnings=FALSE, mode="0755")
diff --git a/test/generate_test_data/helper.R b/test/generate_test_data/helper.R
index 49cd1b5..8ec122b 100644
--- a/test/generate_test_data/helper.R
+++ b/test/generate_test_data/helper.R
@@ -1,10 +1,12 @@
 #' Generate a sample of (X,Y) of size n with default values
+#'
 #' @param n sample size
 #' @param p number of covariates
 #' @param m size of the response
 #' @param k number of clusters
+#'
 #' @return list with X and Y
-#' @export
+#'
 generateXYdefault = function(n, p, m, k)
 {
 	meanX = rep(0, p)
@@ -12,27 +14,30 @@ generateXYdefault = function(n, p, m, k)
 	covY = array(dim=c(m,m,k))
 	for(r in 1:k)
 		covY[,,r] = diag(m)
-	pi = rep(1./k,k)
+	π = rep(1./k,k)
 	#initialize beta to a random number of non-zero random value
-	beta = array(0, dim=c(p,m,k))
+	β = array(0, dim=c(p,m,k))
 	for (j in 1:p)
 	{
 		nonZeroCount = sample(1:m, 1)
-		beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
+		β[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
 	}
 
-	sample_IO = generateXY(meanX, covX, covY, pi, beta, n)
+	sample_IO = generateXY(n, π, meanX, β, covX, covY)
 	return (list(X=sample_IO$X,Y=sample_IO$Y))
 }
 
-#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights,
-#' identity for covariance matrices, and uniformly distributed for the clustering)
+#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for
+#' weights, identity for covariance matrices, and uniformly distributed for the
+#' clustering)
+#'
 #' @param n sample size
 #' @param p number of covariates
 #' @param m size of the response
 #' @param k number of clusters
+#'
 #' @return list with phiInit, rhoInit,piInit,gamInit
-#' @export
+#'
 basicInitParameters = function(n,p,m,k)
 {
 	phiInit = array(0, dim=c(p,m,k))
@@ -49,5 +54,5 @@ basicInitParameters = function(n,p,m,k)
 		gamInit[i,R[i]] = 0.9
 	gamInit = gamInit/sum(gamInit[1,])
 
-	return (list("phiInit" = phiInit, "rhoInit" = rhoInit, "piInit" = piInit, "gamInit" = gamInit))
+	return (list("phiInit"=phiInit, "rhoInit"=rhoInit, "piInit"=piInit, "gamInit"=gamInit))
 }
diff --git a/test/test.EMGLLF.c b/test/test.EMGLLF.c
index 7eed301..fa6e36c 100644
--- a/test/test.EMGLLF.c
+++ b/test/test.EMGLLF.c
@@ -31,7 +31,7 @@ int main(int argc, char** argv)
 	Real* phi = (Real*)malloc(p*m*k*sizeof(Real));
 	Real* rho = (Real*)malloc(m*m*k*sizeof(Real));
 	Real* pi = (Real*)malloc(k*sizeof(Real));
-	Real* LLF = (Real*)malloc(maxi*sizeof(Real));
+	Real llh;
 	Real* S = (Real*)malloc(p*m*k*sizeof(Real));
 	int* affec = (int*)malloc(n*sizeof(int));
 	/////////////
@@ -39,7 +39,7 @@ int main(int argc, char** argv)
 	////////////////////
 	// Call to EMGLLF //
 	EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,
-		phi,rho,pi,LLF,S,affec,
+		phi,rho,pi,&llh,S,affec,
 		n,p,m,k);
 	////////////////////
 
@@ -66,10 +66,8 @@ int main(int argc, char** argv)
 	free(pi);
 	free(ref_pi);
 
-	Real* ref_LLF = readArray_real("LLF");
-	compareArray_real("LLF", LLF, ref_LLF, maxi);
-	free(LLF);
-	free(ref_LLF);
+	Real ref_llh = read_real("llh");
+	compareArray_real("llh", &llh, &ref_llh, 1);
 
 	Real* ref_S = readArray_real("S");
 	compareArray_real("S", S, ref_S, p*m*k);
-- 
2.44.0