Add 'fast' argument to select C code or R code
authorBenjamin Auder <benjamin.auder@somewhere>
Tue, 11 Apr 2017 12:35:56 +0000 (14:35 +0200)
committerBenjamin Auder <benjamin.auder@somewhere>
Tue, 11 Apr 2017 12:35:56 +0000 (14:35 +0200)
16 files changed:
pkg/R/EMGLLF.R
pkg/R/EMGLLF_R.R [deleted file]
pkg/R/EMGrank.R
pkg/R/EMGrank_R.R [deleted file]
pkg/R/computeGridLambda.R
pkg/R/constructionModelesLassoMLE.R
pkg/R/constructionModelesLassoRank.R
pkg/R/initSmallEM.R
pkg/R/main.R
pkg/R/selectVariables.R
test/generateRunSaveTest_EMGLLF.R [moved from test/generate_test_data/generateRunSaveTest_EMGLLF.R with 99% similarity]
test/generateRunSaveTest_EMGrank.R [moved from test/generate_test_data/generateRunSaveTest_EMGrank.R with 98% similarity]
test/generate_test_data/EMGLLF.R [deleted file]
test/generate_test_data/EMGrank.R [deleted file]
test/helper.R [moved from test/generate_test_data/helper.R with 100% similarity]
test/run.sh

index 5484706..0158914 100644 (file)
 #'
 #' @export
 EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
-       mini, maxi, gamma, lambda, X, Y, tau)
+       mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE)
 {
-       #TEMPORARY: use R version
-       return (EMGLLF_R(phiInit, rhoInit, piInit, gamInit,mini, maxi, gamma, lambda, X, Y, tau))
+       if (!fast)
+       {
+               # Function in R
+               return (EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau))
+       }
 
+       # Function in C
        n = nrow(X) #nombre d'echantillons
        p = ncol(X) #nombre de covariables
        m = ncol(Y) #taille de Y (multivarié)
@@ -39,3 +43,148 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
                n, p, m, k,
                PACKAGE="valse")
 }
+
+# R version - slow but easy to read
+EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
+{
+       # Matrix dimensions
+       n = dim(X)[1]
+       p = dim(phiInit)[1]
+       m = dim(phiInit)[2]
+       k = dim(phiInit)[3]
+
+       # Outputs
+       phi = phiInit
+       rho = rhoInit
+       pi = piInit
+       llh = -Inf
+       S = array(0, dim=c(p,m,k))
+
+       # Algorithm variables
+       gam = gamInit
+       Gram2 = array(0, dim=c(p,p,k))
+       ps2 = array(0, dim=c(p,m,k))
+       X2 = array(0, dim=c(n,p,k))
+       Y2 = array(0, dim=c(n,m,k))
+       EPS = 1e-15
+
+       for (ite in 1:maxi)
+       {
+               # Remember last pi,rho,phi values for exit condition in the end of loop
+               Phi = phi
+               Rho = rho
+               Pi = pi
+
+               # Calcul associé à Y et X
+               for (r in 1:k)
+               {
+                       for (mm in 1:m)
+                               Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
+                       for (i in 1:n)
+                               X2[i,,r] = sqrt(gam[i,r]) * X[i,]
+                       for (mm in 1:m)
+                               ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+                       for (j in 1:p)
+                       {
+                               for (s in 1:p)
+                                       Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+                       }
+               }
+
+               ##########
+               #Etape M #
+               ##########
+
+               # Pour pi
+               b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
+               gam2 = colSums(gam)
+               a = sum(gam %*% log(pi))
+
+               # Tant que les props sont negatives
+               kk = 0
+               pi2AllPositive = FALSE
+               while (!pi2AllPositive)
+               {
+                       pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
+                       pi2AllPositive = all(pi2 >= 0)
+                       kk = kk+1
+               }
+
+               # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
+               while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+                       -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
+               {
+                       pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
+                       kk = kk + 1
+               }
+               t = 0.1^kk
+               pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+
+               #Pour phi et rho
+               for (r in 1:k)
+               {
+                       for (mm in 1:m)
+                       {
+                               ps = 0
+                               for (i in 1:n)
+                                       ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
+                               nY2 = sum(Y2[,mm,r]^2)
+                               rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
+                       }
+               }
+
+               for (r in 1:k)
+               {
+                       for (j in 1:p)
+                       {
+                               for (mm in 1:m)
+                               {
+                                       S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
+                                       if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
+                                               phi[j,mm,r]=0
+                                       else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
+                                               phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
+                                       else
+                                               phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
+                               }
+                       }
+               }
+
+               ##########
+               #Etape E #
+               ##########
+
+               # Precompute det(rho[,,r]) for r in 1...k
+               detRho = sapply(1:k, function(r) det(rho[,,r]))
+
+               sumLogLLH = 0
+               for (i in 1:n)
+               {
+                       # Update gam[,]
+                       sumGamI = 0
+                       for (r in 1:k)
+                       {
+                               gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
+                               sumGamI = sumGamI + gam[i,r]
+                       }
+                       sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
+                       if (sumGamI > EPS) #else: gam[i,] is already ~=0
+                               gam[i,] = gam[i,] / sumGamI
+               }
+
+               sumPen = sum(pi^gamma * b)
+               last_llh = llh
+               llh = -sumLogLLH/n + lambda*sumPen
+               dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
+               Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
+               Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
+               Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
+               dist2 = max(Dist1,Dist2,Dist3)
+
+               if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
+                       break
+       }
+
+       affec = apply(gam, 1, which.max)
+       list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
+}
diff --git a/pkg/R/EMGLLF_R.R b/pkg/R/EMGLLF_R.R
deleted file mode 100644 (file)
index 09ae2e3..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
-{
-       # Matrix dimensions
-       n = dim(X)[1]
-       p = dim(phiInit)[1]
-       m = dim(phiInit)[2]
-       k = dim(phiInit)[3]
-
-       # Outputs
-       phi = phiInit
-       rho = rhoInit
-       pi = piInit
-       llh = -Inf
-       S = array(0, dim=c(p,m,k))
-
-       # Algorithm variables
-       gam = gamInit
-       Gram2 = array(0, dim=c(p,p,k))
-       ps2 = array(0, dim=c(p,m,k))
-       X2 = array(0, dim=c(n,p,k))
-       Y2 = array(0, dim=c(n,m,k))
-       EPS = 1e-15
-
-       for (ite in 1:maxi)
-       {
-               # Remember last pi,rho,phi values for exit condition in the end of loop
-               Phi = phi
-               Rho = rho
-               Pi = pi
-
-               # Calcul associé à Y et X
-               for (r in 1:k)
-               {
-                       for (mm in 1:m)
-                               Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
-                       for (i in 1:n)
-                               X2[i,,r] = sqrt(gam[i,r]) * X[i,]
-                       for (mm in 1:m)
-                               ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
-                       for (j in 1:p)
-                       {
-                               for (s in 1:p)
-                                       Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
-                       }
-               }
-
-               ##########
-               #Etape M #
-               ##########
-
-               # Pour pi
-               b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
-               gam2 = colSums(gam)
-               a = sum(gam %*% log(pi))
-
-               # Tant que les props sont negatives
-               kk = 0
-               pi2AllPositive = FALSE
-               while (!pi2AllPositive)
-               {
-                       pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
-                       pi2AllPositive = all(pi2 >= 0)
-                       kk = kk+1
-               }
-
-               # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
-               while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
-                       -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
-               {
-                       pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
-                       kk = kk + 1
-               }
-               t = 0.1^kk
-               pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
-               #Pour phi et rho
-               for (r in 1:k)
-               {
-                       for (mm in 1:m)
-                       {
-                               ps = 0
-                               for (i in 1:n)
-                                       ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
-                               nY2 = sum(Y2[,mm,r]^2)
-                               rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
-                       }
-               }
-
-               for (r in 1:k)
-               {
-                       for (j in 1:p)
-                       {
-                               for (mm in 1:m)
-                               {
-                                       S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
-                                       if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
-                                               phi[j,mm,r]=0
-                                       else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
-                                               phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
-                                       else
-                                               phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
-                               }
-                       }
-               }
-
-               ##########
-               #Etape E #
-               ##########
-
-               # Precompute det(rho[,,r]) for r in 1...k
-               detRho = sapply(1:k, function(r) det(rho[,,r]))
-
-               sumLogLLH = 0
-               for (i in 1:n)
-               {
-                       # Update gam[,]
-                       sumGamI = 0
-                       for (r in 1:k)
-                       {
-                               gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
-                               sumGamI = sumGamI + gam[i,r]
-                       }
-                       sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
-                       if (sumGamI > EPS) #else: gam[i,] is already ~=0
-                               gam[i,] = gam[i,] / sumGamI
-               }
-
-               sumPen = sum(pi^gamma * b)
-               last_llh = llh
-               llh = -sumLogLLH/n + lambda*sumPen
-               dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
-               Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
-               Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
-               Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
-               dist2 = max(Dist1,Dist2,Dist3)
-
-               if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
-                       break
-       }
-
-       affec = apply(gam, 1, which.max)
-       list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
-}
index 3216870..e30b605 100644 (file)
 #'   LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
 #'
 #' @export
-EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast=TRUE)
 {
-       #TEMPORARY: use R version
-       return (EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
+       if (!fast)
+       {
+               # Function in R
+               return (EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
+       }
 
+       # Function in C
        n = nrow(X) #nombre d'echantillons
        p = ncol(X) #nombre de covariables
        m = ncol(Y) #taille de Y (multivarié)
@@ -32,3 +36,89 @@ EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
                n, p, m, k,
                PACKAGE="valse")
 }
+
+#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
+# --> Yes, we should use by-columns storage everywhere... [later!]
+matricize <- function(X)
+{
+       if (!is.matrix(X))
+               return (t(as.matrix(X)))
+       return (X)
+}
+
+# R version - slow but easy to read
+EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+{
+  #matrix dimensions
+  n = dim(X)[1]
+  p = dim(X)[2]
+  m = dim(Rho)[2]
+  k = dim(Rho)[3]
+
+  #init outputs
+  phi = array(0, dim=c(p,m,k))
+  Z = rep(1, n)
+  LLF = 0
+
+  #local variables
+  Phi = array(0, dim=c(p,m,k))
+  deltaPhi = c()
+  sumDeltaPhi = 0.
+  deltaPhiBufferSize = 20
+
+  #main loop
+  ite = 1
+  while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
+       {
+    #M step: Mise à jour de Beta (et donc phi)
+    for(r in 1:k)
+               {
+      Z_indice = seq_len(n)[Z==r] #indices où Z == r
+      if (length(Z_indice) == 0)
+        next
+      #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+      s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
+                               crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
+      S = s$d
+      #Set m-rank(r) singular values to zero, and recompose
+      #best rank(r) approximation of the initial product
+      if(rank[r] < length(S))
+        S[(rank[r]+1):length(S)] = 0
+      phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
+    }
+
+               #Etape E et calcul de LLF
+               sumLogLLF2 = 0
+               for(i in seq_len(n))
+               {
+                       sumLLF1 = 0
+                       maxLogGamIR = -Inf
+                       for (r in seq_len(k))
+                       {
+                               dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
+                               logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
+                               #Z[i] = index of max (gam[i,])
+                               if(logGamIR > maxLogGamIR)
+                               {
+                                       Z[i] = r
+                                       maxLogGamIR = logGamIR
+                               }
+                               sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
+                       }
+                       sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
+               }
+  
+               LLF = -1/n * sumLogLLF2
+
+               #update distance parameter to check algorithm convergence (delta(phi, Phi))
+               deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
+               if (length(deltaPhi) > deltaPhiBufferSize)
+                 deltaPhi = deltaPhi[2:length(deltaPhi)]
+               sumDeltaPhi = sum(abs(deltaPhi))
+
+               #update other local variables
+               Phi = phi
+               ite = ite+1
+  }
+  return(list("phi"=phi, "LLF"=LLF))
+}
diff --git a/pkg/R/EMGrank_R.R b/pkg/R/EMGrank_R.R
deleted file mode 100644 (file)
index c4576e4..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
-# --> Yes, we should use by-columns storage everywhere... [later!]
-matricize <- function(X)
-{
-       if (!is.matrix(X))
-               return (t(as.matrix(X)))
-       return (X)
-}
-
-require(MASS)
-EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
-{
-  #matrix dimensions
-  n = dim(X)[1]
-  p = dim(X)[2]
-  m = dim(Rho)[2]
-  k = dim(Rho)[3]
-  
-  #init outputs
-  phi = array(0, dim=c(p,m,k))
-  Z = rep(1, n)
-  LLF = 0
-  
-  #local variables
-  Phi = array(0, dim=c(p,m,k))
-  deltaPhi = c()
-  sumDeltaPhi = 0.
-  deltaPhiBufferSize = 20
-  
-  #main loop
-  ite = 1
-  while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
-       {
-    #M step: Mise à jour de Beta (et donc phi)
-    for(r in 1:k)
-               {
-      Z_indice = seq_len(n)[Z==r] #indices où Z == r
-      if (length(Z_indice) == 0)
-        next
-      #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
-      s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
-                               crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
-      S = s$d
-      #Set m-rank(r) singular values to zero, and recompose
-      #best rank(r) approximation of the initial product
-      if(rank[r] < length(S))
-        S[(rank[r]+1):length(S)] = 0
-      phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
-    }
-
-               #Etape E et calcul de LLF
-               sumLogLLF2 = 0
-               for(i in seq_len(n))
-               {
-                       sumLLF1 = 0
-                       maxLogGamIR = -Inf
-                       for (r in seq_len(k))
-                       {
-                               dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
-                               logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
-                               #Z[i] = index of max (gam[i,])
-                               if(logGamIR > maxLogGamIR)
-                               {
-                                       Z[i] = r
-                                       maxLogGamIR = logGamIR
-                               }
-                               sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
-                       }
-                       sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
-               }
-  
-               LLF = -1/n * sumLogLLF2
-
-               #update distance parameter to check algorithm convergence (delta(phi, Phi))
-               deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
-               if (length(deltaPhi) > deltaPhiBufferSize)
-                 deltaPhi = deltaPhi[2:length(deltaPhi)]
-               sumDeltaPhi = sum(abs(deltaPhi))
-
-               #update other local variables
-               Phi = phi
-               ite = ite+1
-  }
-  return(list("phi"=phi, "LLF"=LLF))
-}
index e2b6303..8ec4d66 100644 (file)
@@ -17,7 +17,7 @@
 #'
 #' @export
 computeGridLambda = function(phiInit, rhoInit, piInit, gamInit, X, Y,
-       gamma, mini, maxi, tau)
+       gamma, mini, maxi, tau, fast=TRUE)
 {
        n = nrow(X)
        p = dim(phiInit)[1]
@@ -25,7 +25,7 @@ computeGridLambda = function(phiInit, rhoInit, piInit, gamInit, X, Y,
        k = dim(phiInit)[3]
 
        list_EMG = EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi,
-               gamma, lambda=0, X, Y, tau)
+               gamma, lambda=0, X, Y, tau, fast)
        grid = array(0, dim=c(p,m,k))
        for (i in 1:p)
        {
index 06d552d..dbae65d 100644 (file)
@@ -8,7 +8,7 @@
 #'
 #' export
 constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini, maxi,
-       gamma, X, Y, thresh, tau, S, ncores=3, artefact = 1e3, verbose=FALSE)
+       gamma, X, Y, thresh, tau, S, ncores=3, artefact = 1e3, fast=TRUE, verbose=FALSE)
 {
        if (ncores > 1)
        {
@@ -41,7 +41,7 @@ constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini,
 
                # lambda == 0 because we compute the EMV: no penalization here
                res = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0,
-                       X[,col.sel],Y,tau)
+                       X[,col.sel], Y, tau, fast)
                
                # Eval dimension from the result + selected
                phiLambda2 = res$phi
index 6dbf350..339ba60 100644 (file)
@@ -8,7 +8,7 @@
 #'
 #' export
 constructionModelesLassoRank = function(pi, rho, mini, maxi, X, Y, tau, A1, rangmin,
-       rangmax, ncores, verbose=FALSE)
+       rangmax, ncores, fast=TRUE, verbose=FALSE)
 {
   n = dim(X)[1]
   p = dim(X)[2]
@@ -57,7 +57,7 @@ constructionModelesLassoRank = function(pi, rho, mini, maxi, X, Y, tau, A1, rang
       for (j in 1:Size)
                        {
         res = EMGrank(Pi[,lambdaIndex], Rho[,,,lambdaIndex], mini, maxi,
-                                       X[,active], Y, tau, Rank[j,])
+                                       X[,active], Y, tau, Rank[j,], fast)
         llh = rbind(llh,
                                        c( res$LLF, sum(Rank[j,] * (length(active)- Rank[j,] + m)) ) )
         phi[active,,,] = rbind(phi[active,,,], res$phi)
index bfe1d46..9b58a0c 100644 (file)
@@ -8,7 +8,7 @@
 #' @export
 #' @importFrom methods new
 #' @importFrom stats cutree dist hclust runif
-initSmallEM = function(k,X,Y)
+initSmallEM = function(k,X,Y, fast=TRUE)
 {
        n = nrow(Y)
        m = ncol(Y)
@@ -63,7 +63,7 @@ initSmallEM = function(k,X,Y)
                maxiInit = 11
                
                new_EMG = EMGLLF(phiInit1[,,,repet], rhoInit1[,,,repet], piInit1[repet,],
-                       gamInit1[,,repet], miniInit, maxiInit, gamma=1, lambda=0, X, Y, tau=1e-4)
+                       gamInit1[,,repet], miniInit, maxiInit, gamma=1, lambda=0, X, Y, tau=1e-4, fast)
                LLFEessai = new_EMG$LLF
                LLFinit1[repet] = LLFEessai[length(LLFEessai)]
        }
index ecfe506..de473f7 100644 (file)
 #' @param kmax integer, maximum number of clusters, by default = 10
 #' @param rang.min integer, minimum rank in the low rank procedure, by default = 1
 #' @param rang.max integer, maximum rank in the
+#' @param ncores_outer Number of cores for the outer loop on k
+#' @param ncores_inner Number of cores for the inner loop on lambda
+#' @param size_coll_mod (Maximum) size of a collection of models
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
 #'
 #' @return a list with estimators of parameters
 #'
@@ -21,8 +26,8 @@
 #' #TODO: a few examples
 #' @export
 valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10, maxi=50,
-       eps=1e-4, kmin=2, kmax=4, rang.min=1, rang.max=10, ncores_outer=1, ncores_inner=1, size_coll_mod = 50,
-       verbose=FALSE)
+       eps=1e-4, kmin=2, kmax=4, rang.min=1, rang.max=10, ncores_outer=1, ncores_inner=1,
+       size_coll_mod=50, fast=TRUE, verbose=FALSE)
 {
   p = dim(X)[2]
   m = dim(Y)[2]
@@ -52,7 +57,7 @@ valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10,
     #iterations of the EM algorithm.
     P = initSmallEM(k, X, Y)
     grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, X, Y,
-                       gamma, mini, maxi, eps)
+                       gamma, mini, maxi, eps, fast)
     if (length(grid_lambda)>size_coll_mod)
       grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
 
@@ -61,7 +66,7 @@ valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10,
     #select variables according to each regularization parameter
     #from the grid: S$selected corresponding to selected variables
     S = selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma,
-                       grid_lambda, X, Y, 1e-8, eps, ncores_inner) #TODO: 1e-8 as arg?! eps?
+                       grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps?
     
     if (procedure == 'LassoMLE')
                {
@@ -70,7 +75,7 @@ valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10,
       #compute parameter estimations, with the Maximum Likelihood
       #Estimator, restricted on selected variables.
       models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
-                               mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, artefact = 1e3, verbose)
+                               mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, artefact=1e3, fast, verbose)
     }
                else
                {
@@ -79,7 +84,7 @@ valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10,
       #compute parameter estimations, with the Low Rank
       #Estimator, restricted on selected variables.
       models <- constructionModelesLassoRank(S$Pi, S$Rho, mini, maxi, X, Y, eps, A1,
-                               rank.min, rank.max, ncores_inner, verbose)
+                               rank.min, rank.max, ncores_inner, fast, verbose)
     }
                #attention certains modeles sont NULL après selectVariables
                models = models[sapply(models, function(cell) !is.null(cell))]
index 4e9b374..b23eac2 100644 (file)
@@ -23,7 +23,7 @@
 #' @export
 #'
 selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
-       X,Y,thresh,tau, ncores=3)
+       X,Y,thresh,tau, ncores=3, fast=TRUE)
 {
        if (ncores > 1)
        {
@@ -36,7 +36,7 @@ selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambd
        # Calcul pour un lambda
        computeCoefs <- function(lambda)
        {
-               params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
+               params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
 
                p = dim(phiInit)[1]
                m = dim(phiInit)[2]
similarity index 99%
rename from test/generate_test_data/generateRunSaveTest_EMGLLF.R
rename to test/generateRunSaveTest_EMGLLF.R
index 8fe1f33..8a61c1b 100644 (file)
@@ -1,4 +1,3 @@
-source("EMGLLF.R")
 source("helper.R")
 
 generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10,
similarity index 98%
rename from test/generate_test_data/generateRunSaveTest_EMGrank.R
rename to test/generateRunSaveTest_EMGrank.R
index 14c1b2a..becf62a 100644 (file)
@@ -1,4 +1,3 @@
-source("EMGrank.R")
 source("helper.R")
 
 generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1.0,
diff --git a/test/generate_test_data/EMGLLF.R b/test/generate_test_data/EMGLLF.R
deleted file mode 100644 (file)
index 09ae2e3..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
-{
-       # Matrix dimensions
-       n = dim(X)[1]
-       p = dim(phiInit)[1]
-       m = dim(phiInit)[2]
-       k = dim(phiInit)[3]
-
-       # Outputs
-       phi = phiInit
-       rho = rhoInit
-       pi = piInit
-       llh = -Inf
-       S = array(0, dim=c(p,m,k))
-
-       # Algorithm variables
-       gam = gamInit
-       Gram2 = array(0, dim=c(p,p,k))
-       ps2 = array(0, dim=c(p,m,k))
-       X2 = array(0, dim=c(n,p,k))
-       Y2 = array(0, dim=c(n,m,k))
-       EPS = 1e-15
-
-       for (ite in 1:maxi)
-       {
-               # Remember last pi,rho,phi values for exit condition in the end of loop
-               Phi = phi
-               Rho = rho
-               Pi = pi
-
-               # Calcul associé à Y et X
-               for (r in 1:k)
-               {
-                       for (mm in 1:m)
-                               Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
-                       for (i in 1:n)
-                               X2[i,,r] = sqrt(gam[i,r]) * X[i,]
-                       for (mm in 1:m)
-                               ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
-                       for (j in 1:p)
-                       {
-                               for (s in 1:p)
-                                       Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
-                       }
-               }
-
-               ##########
-               #Etape M #
-               ##########
-
-               # Pour pi
-               b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
-               gam2 = colSums(gam)
-               a = sum(gam %*% log(pi))
-
-               # Tant que les props sont negatives
-               kk = 0
-               pi2AllPositive = FALSE
-               while (!pi2AllPositive)
-               {
-                       pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
-                       pi2AllPositive = all(pi2 >= 0)
-                       kk = kk+1
-               }
-
-               # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
-               while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
-                       -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
-               {
-                       pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
-                       kk = kk + 1
-               }
-               t = 0.1^kk
-               pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
-               #Pour phi et rho
-               for (r in 1:k)
-               {
-                       for (mm in 1:m)
-                       {
-                               ps = 0
-                               for (i in 1:n)
-                                       ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
-                               nY2 = sum(Y2[,mm,r]^2)
-                               rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
-                       }
-               }
-
-               for (r in 1:k)
-               {
-                       for (j in 1:p)
-                       {
-                               for (mm in 1:m)
-                               {
-                                       S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
-                                       if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
-                                               phi[j,mm,r]=0
-                                       else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
-                                               phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
-                                       else
-                                               phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
-                               }
-                       }
-               }
-
-               ##########
-               #Etape E #
-               ##########
-
-               # Precompute det(rho[,,r]) for r in 1...k
-               detRho = sapply(1:k, function(r) det(rho[,,r]))
-
-               sumLogLLH = 0
-               for (i in 1:n)
-               {
-                       # Update gam[,]
-                       sumGamI = 0
-                       for (r in 1:k)
-                       {
-                               gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
-                               sumGamI = sumGamI + gam[i,r]
-                       }
-                       sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
-                       if (sumGamI > EPS) #else: gam[i,] is already ~=0
-                               gam[i,] = gam[i,] / sumGamI
-               }
-
-               sumPen = sum(pi^gamma * b)
-               last_llh = llh
-               llh = -sumLogLLH/n + lambda*sumPen
-               dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
-               Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
-               Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
-               Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
-               dist2 = max(Dist1,Dist2,Dist3)
-
-               if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
-                       break
-       }
-
-       affec = apply(gam, 1, which.max)
-       list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
-}
diff --git a/test/generate_test_data/EMGrank.R b/test/generate_test_data/EMGrank.R
deleted file mode 100644 (file)
index c4576e4..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
-# --> Yes, we should use by-columns storage everywhere... [later!]
-matricize <- function(X)
-{
-       if (!is.matrix(X))
-               return (t(as.matrix(X)))
-       return (X)
-}
-
-require(MASS)
-EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
-{
-  #matrix dimensions
-  n = dim(X)[1]
-  p = dim(X)[2]
-  m = dim(Rho)[2]
-  k = dim(Rho)[3]
-  
-  #init outputs
-  phi = array(0, dim=c(p,m,k))
-  Z = rep(1, n)
-  LLF = 0
-  
-  #local variables
-  Phi = array(0, dim=c(p,m,k))
-  deltaPhi = c()
-  sumDeltaPhi = 0.
-  deltaPhiBufferSize = 20
-  
-  #main loop
-  ite = 1
-  while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
-       {
-    #M step: Mise à jour de Beta (et donc phi)
-    for(r in 1:k)
-               {
-      Z_indice = seq_len(n)[Z==r] #indices où Z == r
-      if (length(Z_indice) == 0)
-        next
-      #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
-      s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
-                               crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
-      S = s$d
-      #Set m-rank(r) singular values to zero, and recompose
-      #best rank(r) approximation of the initial product
-      if(rank[r] < length(S))
-        S[(rank[r]+1):length(S)] = 0
-      phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
-    }
-
-               #Etape E et calcul de LLF
-               sumLogLLF2 = 0
-               for(i in seq_len(n))
-               {
-                       sumLLF1 = 0
-                       maxLogGamIR = -Inf
-                       for (r in seq_len(k))
-                       {
-                               dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
-                               logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
-                               #Z[i] = index of max (gam[i,])
-                               if(logGamIR > maxLogGamIR)
-                               {
-                                       Z[i] = r
-                                       maxLogGamIR = logGamIR
-                               }
-                               sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
-                       }
-                       sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
-               }
-  
-               LLF = -1/n * sumLogLLF2
-
-               #update distance parameter to check algorithm convergence (delta(phi, Phi))
-               deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
-               if (length(deltaPhi) > deltaPhiBufferSize)
-                 deltaPhi = deltaPhi[2:length(deltaPhi)]
-               sumDeltaPhi = sum(abs(deltaPhi))
-
-               #update other local variables
-               Phi = phi
-               ite = ite+1
-  }
-  return(list("phi"=phi, "LLF"=LLF))
-}
index 32cd0a9..7e92929 100755 (executable)
@@ -24,12 +24,10 @@ if [ "$2" == 'r' ] || [ "$2" == 'c' ]; then
 fi
 
 #1) Generate data using R versions of EMGLLF/EMGrank (slow, but trusted)
-cd generate_test_data/
 echo -e "source('generateRunSaveTest_$algo.R');\n \
                # I'm happy with default values - feel free to give args\n \
                generateRunSaveTest_$algo() " \
        | R --slave
-cd ..
 
 #2) Compile test C code into an executable named "test.$algo"
 make test.$algo