#'
#' @export
EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
- mini, maxi, gamma, lambda, X, Y, tau)
+ mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE)
{
- #TEMPORARY: use R version
- return (EMGLLF_R(phiInit, rhoInit, piInit, gamInit,mini, maxi, gamma, lambda, X, Y, tau))
+ if (!fast)
+ {
+ # Function in R
+ return (EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau))
+ }
+ # Function in C
n = nrow(X) #nombre d'echantillons
p = ncol(X) #nombre de covariables
m = ncol(Y) #taille de Y (multivarié)
n, p, m, k,
PACKAGE="valse")
}
+
+# R version - slow but easy to read
+EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
+{
+ # Matrix dimensions
+ n = dim(X)[1]
+ p = dim(phiInit)[1]
+ m = dim(phiInit)[2]
+ k = dim(phiInit)[3]
+
+ # Outputs
+ phi = phiInit
+ rho = rhoInit
+ pi = piInit
+ llh = -Inf
+ S = array(0, dim=c(p,m,k))
+
+ # Algorithm variables
+ gam = gamInit
+ Gram2 = array(0, dim=c(p,p,k))
+ ps2 = array(0, dim=c(p,m,k))
+ X2 = array(0, dim=c(n,p,k))
+ Y2 = array(0, dim=c(n,m,k))
+ EPS = 1e-15
+
+ for (ite in 1:maxi)
+ {
+ # Remember last pi,rho,phi values for exit condition in the end of loop
+ Phi = phi
+ Rho = rho
+ Pi = pi
+
+ # Calcul associé à Y et X
+ for (r in 1:k)
+ {
+ for (mm in 1:m)
+ Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
+ for (i in 1:n)
+ X2[i,,r] = sqrt(gam[i,r]) * X[i,]
+ for (mm in 1:m)
+ ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+ for (j in 1:p)
+ {
+ for (s in 1:p)
+ Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+ }
+ }
+
+ ##########
+ #Etape M #
+ ##########
+
+ # Pour pi
+ b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
+ gam2 = colSums(gam)
+ a = sum(gam %*% log(pi))
+
+ # Tant que les props sont negatives
+ kk = 0
+ pi2AllPositive = FALSE
+ while (!pi2AllPositive)
+ {
+ pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
+ pi2AllPositive = all(pi2 >= 0)
+ kk = kk+1
+ }
+
+ # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
+ while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+ -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
+ {
+ pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
+ kk = kk + 1
+ }
+ t = 0.1^kk
+ pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+
+ #Pour phi et rho
+ for (r in 1:k)
+ {
+ for (mm in 1:m)
+ {
+ ps = 0
+ for (i in 1:n)
+ ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
+ nY2 = sum(Y2[,mm,r]^2)
+ rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
+ }
+ }
+
+ for (r in 1:k)
+ {
+ for (j in 1:p)
+ {
+ for (mm in 1:m)
+ {
+ S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
+ if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
+ phi[j,mm,r]=0
+ else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
+ phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
+ else
+ phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
+ }
+ }
+ }
+
+ ##########
+ #Etape E #
+ ##########
+
+ # Precompute det(rho[,,r]) for r in 1...k
+ detRho = sapply(1:k, function(r) det(rho[,,r]))
+
+ sumLogLLH = 0
+ for (i in 1:n)
+ {
+ # Update gam[,]
+ sumGamI = 0
+ for (r in 1:k)
+ {
+ gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
+ sumGamI = sumGamI + gam[i,r]
+ }
+ sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
+ if (sumGamI > EPS) #else: gam[i,] is already ~=0
+ gam[i,] = gam[i,] / sumGamI
+ }
+
+ sumPen = sum(pi^gamma * b)
+ last_llh = llh
+ llh = -sumLogLLH/n + lambda*sumPen
+ dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
+ Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
+ Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
+ Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
+ dist2 = max(Dist1,Dist2,Dist3)
+
+ if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
+ break
+ }
+
+ affec = apply(gam, 1, which.max)
+ list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
+}
+++ /dev/null
-EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
-{
- # Matrix dimensions
- n = dim(X)[1]
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
-
- # Outputs
- phi = phiInit
- rho = rhoInit
- pi = piInit
- llh = -Inf
- S = array(0, dim=c(p,m,k))
-
- # Algorithm variables
- gam = gamInit
- Gram2 = array(0, dim=c(p,p,k))
- ps2 = array(0, dim=c(p,m,k))
- X2 = array(0, dim=c(n,p,k))
- Y2 = array(0, dim=c(n,m,k))
- EPS = 1e-15
-
- for (ite in 1:maxi)
- {
- # Remember last pi,rho,phi values for exit condition in the end of loop
- Phi = phi
- Rho = rho
- Pi = pi
-
- # Calcul associé à Y et X
- for (r in 1:k)
- {
- for (mm in 1:m)
- Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
- for (i in 1:n)
- X2[i,,r] = sqrt(gam[i,r]) * X[i,]
- for (mm in 1:m)
- ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
- for (j in 1:p)
- {
- for (s in 1:p)
- Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
- }
- }
-
- ##########
- #Etape M #
- ##########
-
- # Pour pi
- b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
- gam2 = colSums(gam)
- a = sum(gam %*% log(pi))
-
- # Tant que les props sont negatives
- kk = 0
- pi2AllPositive = FALSE
- while (!pi2AllPositive)
- {
- pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
- pi2AllPositive = all(pi2 >= 0)
- kk = kk+1
- }
-
- # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
- while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
- -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
- {
- pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
- kk = kk + 1
- }
- t = 0.1^kk
- pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
- #Pour phi et rho
- for (r in 1:k)
- {
- for (mm in 1:m)
- {
- ps = 0
- for (i in 1:n)
- ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
- nY2 = sum(Y2[,mm,r]^2)
- rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
- }
- }
-
- for (r in 1:k)
- {
- for (j in 1:p)
- {
- for (mm in 1:m)
- {
- S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
- if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
- phi[j,mm,r]=0
- else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
- phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
- else
- phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
- }
- }
- }
-
- ##########
- #Etape E #
- ##########
-
- # Precompute det(rho[,,r]) for r in 1...k
- detRho = sapply(1:k, function(r) det(rho[,,r]))
-
- sumLogLLH = 0
- for (i in 1:n)
- {
- # Update gam[,]
- sumGamI = 0
- for (r in 1:k)
- {
- gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
- sumGamI = sumGamI + gam[i,r]
- }
- sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
- if (sumGamI > EPS) #else: gam[i,] is already ~=0
- gam[i,] = gam[i,] / sumGamI
- }
-
- sumPen = sum(pi^gamma * b)
- last_llh = llh
- llh = -sumLogLLH/n + lambda*sumPen
- dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
- Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
- Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
- Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
- dist2 = max(Dist1,Dist2,Dist3)
-
- if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
- break
- }
-
- affec = apply(gam, 1, which.max)
- list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
-}
#' LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
#'
#' @export
-EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast=TRUE)
{
- #TEMPORARY: use R version
- return (EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
+ if (!fast)
+ {
+ # Function in R
+ return (EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
+ }
+ # Function in C
n = nrow(X) #nombre d'echantillons
p = ncol(X) #nombre de covariables
m = ncol(Y) #taille de Y (multivarié)
n, p, m, k,
PACKAGE="valse")
}
+
+#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
+# --> Yes, we should use by-columns storage everywhere... [later!]
+matricize <- function(X)
+{
+ if (!is.matrix(X))
+ return (t(as.matrix(X)))
+ return (X)
+}
+
+# R version - slow but easy to read
+EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+{
+ #matrix dimensions
+ n = dim(X)[1]
+ p = dim(X)[2]
+ m = dim(Rho)[2]
+ k = dim(Rho)[3]
+
+ #init outputs
+ phi = array(0, dim=c(p,m,k))
+ Z = rep(1, n)
+ LLF = 0
+
+ #local variables
+ Phi = array(0, dim=c(p,m,k))
+ deltaPhi = c()
+ sumDeltaPhi = 0.
+ deltaPhiBufferSize = 20
+
+ #main loop
+ ite = 1
+ while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
+ {
+ #M step: Mise à jour de Beta (et donc phi)
+ for(r in 1:k)
+ {
+ Z_indice = seq_len(n)[Z==r] #indices où Z == r
+ if (length(Z_indice) == 0)
+ next
+ #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+ s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
+ crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
+ S = s$d
+ #Set m-rank(r) singular values to zero, and recompose
+ #best rank(r) approximation of the initial product
+ if(rank[r] < length(S))
+ S[(rank[r]+1):length(S)] = 0
+ phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
+ }
+
+ #Etape E et calcul de LLF
+ sumLogLLF2 = 0
+ for(i in seq_len(n))
+ {
+ sumLLF1 = 0
+ maxLogGamIR = -Inf
+ for (r in seq_len(k))
+ {
+ dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
+ logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
+ #Z[i] = index of max (gam[i,])
+ if(logGamIR > maxLogGamIR)
+ {
+ Z[i] = r
+ maxLogGamIR = logGamIR
+ }
+ sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
+ }
+ sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
+ }
+
+ LLF = -1/n * sumLogLLF2
+
+ #update distance parameter to check algorithm convergence (delta(phi, Phi))
+ deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
+ if (length(deltaPhi) > deltaPhiBufferSize)
+ deltaPhi = deltaPhi[2:length(deltaPhi)]
+ sumDeltaPhi = sum(abs(deltaPhi))
+
+ #update other local variables
+ Phi = phi
+ ite = ite+1
+ }
+ return(list("phi"=phi, "LLF"=LLF))
+}
+++ /dev/null
-#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
-# --> Yes, we should use by-columns storage everywhere... [later!]
-matricize <- function(X)
-{
- if (!is.matrix(X))
- return (t(as.matrix(X)))
- return (X)
-}
-
-require(MASS)
-EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
-{
- #matrix dimensions
- n = dim(X)[1]
- p = dim(X)[2]
- m = dim(Rho)[2]
- k = dim(Rho)[3]
-
- #init outputs
- phi = array(0, dim=c(p,m,k))
- Z = rep(1, n)
- LLF = 0
-
- #local variables
- Phi = array(0, dim=c(p,m,k))
- deltaPhi = c()
- sumDeltaPhi = 0.
- deltaPhiBufferSize = 20
-
- #main loop
- ite = 1
- while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
- {
- #M step: Mise à jour de Beta (et donc phi)
- for(r in 1:k)
- {
- Z_indice = seq_len(n)[Z==r] #indices où Z == r
- if (length(Z_indice) == 0)
- next
- #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
- s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
- crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
- S = s$d
- #Set m-rank(r) singular values to zero, and recompose
- #best rank(r) approximation of the initial product
- if(rank[r] < length(S))
- S[(rank[r]+1):length(S)] = 0
- phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
- }
-
- #Etape E et calcul de LLF
- sumLogLLF2 = 0
- for(i in seq_len(n))
- {
- sumLLF1 = 0
- maxLogGamIR = -Inf
- for (r in seq_len(k))
- {
- dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
- logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
- #Z[i] = index of max (gam[i,])
- if(logGamIR > maxLogGamIR)
- {
- Z[i] = r
- maxLogGamIR = logGamIR
- }
- sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
- }
- sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
- }
-
- LLF = -1/n * sumLogLLF2
-
- #update distance parameter to check algorithm convergence (delta(phi, Phi))
- deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
- if (length(deltaPhi) > deltaPhiBufferSize)
- deltaPhi = deltaPhi[2:length(deltaPhi)]
- sumDeltaPhi = sum(abs(deltaPhi))
-
- #update other local variables
- Phi = phi
- ite = ite+1
- }
- return(list("phi"=phi, "LLF"=LLF))
-}
#'
#' @export
computeGridLambda = function(phiInit, rhoInit, piInit, gamInit, X, Y,
- gamma, mini, maxi, tau)
+ gamma, mini, maxi, tau, fast=TRUE)
{
n = nrow(X)
p = dim(phiInit)[1]
k = dim(phiInit)[3]
list_EMG = EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi,
- gamma, lambda=0, X, Y, tau)
+ gamma, lambda=0, X, Y, tau, fast)
grid = array(0, dim=c(p,m,k))
for (i in 1:p)
{
#'
#' export
constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini, maxi,
- gamma, X, Y, thresh, tau, S, ncores=3, artefact = 1e3, verbose=FALSE)
+ gamma, X, Y, thresh, tau, S, ncores=3, artefact = 1e3, fast=TRUE, verbose=FALSE)
{
if (ncores > 1)
{
# lambda == 0 because we compute the EMV: no penalization here
res = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0,
- X[,col.sel],Y,tau)
+ X[,col.sel], Y, tau, fast)
# Eval dimension from the result + selected
phiLambda2 = res$phi
#'
#' export
constructionModelesLassoRank = function(pi, rho, mini, maxi, X, Y, tau, A1, rangmin,
- rangmax, ncores, verbose=FALSE)
+ rangmax, ncores, fast=TRUE, verbose=FALSE)
{
n = dim(X)[1]
p = dim(X)[2]
for (j in 1:Size)
{
res = EMGrank(Pi[,lambdaIndex], Rho[,,,lambdaIndex], mini, maxi,
- X[,active], Y, tau, Rank[j,])
+ X[,active], Y, tau, Rank[j,], fast)
llh = rbind(llh,
c( res$LLF, sum(Rank[j,] * (length(active)- Rank[j,] + m)) ) )
phi[active,,,] = rbind(phi[active,,,], res$phi)
#' @export
#' @importFrom methods new
#' @importFrom stats cutree dist hclust runif
-initSmallEM = function(k,X,Y)
+initSmallEM = function(k,X,Y, fast=TRUE)
{
n = nrow(Y)
m = ncol(Y)
maxiInit = 11
new_EMG = EMGLLF(phiInit1[,,,repet], rhoInit1[,,,repet], piInit1[repet,],
- gamInit1[,,repet], miniInit, maxiInit, gamma=1, lambda=0, X, Y, tau=1e-4)
+ gamInit1[,,repet], miniInit, maxiInit, gamma=1, lambda=0, X, Y, tau=1e-4, fast)
LLFEessai = new_EMG$LLF
LLFinit1[repet] = LLFEessai[length(LLFEessai)]
}
#' @param kmax integer, maximum number of clusters, by default = 10
#' @param rang.min integer, minimum rank in the low rank procedure, by default = 1
#' @param rang.max integer, maximum rank in the
+#' @param ncores_outer Number of cores for the outer loop on k
+#' @param ncores_inner Number of cores for the inner loop on lambda
+#' @param size_coll_mod (Maximum) size of a collection of models
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
#'
#' @return a list with estimators of parameters
#'
#' #TODO: a few examples
#' @export
valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10, maxi=50,
- eps=1e-4, kmin=2, kmax=4, rang.min=1, rang.max=10, ncores_outer=1, ncores_inner=1, size_coll_mod = 50,
- verbose=FALSE)
+ eps=1e-4, kmin=2, kmax=4, rang.min=1, rang.max=10, ncores_outer=1, ncores_inner=1,
+ size_coll_mod=50, fast=TRUE, verbose=FALSE)
{
p = dim(X)[2]
m = dim(Y)[2]
#iterations of the EM algorithm.
P = initSmallEM(k, X, Y)
grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, X, Y,
- gamma, mini, maxi, eps)
+ gamma, mini, maxi, eps, fast)
if (length(grid_lambda)>size_coll_mod)
grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
#select variables according to each regularization parameter
#from the grid: S$selected corresponding to selected variables
S = selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma,
- grid_lambda, X, Y, 1e-8, eps, ncores_inner) #TODO: 1e-8 as arg?! eps?
+ grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps?
if (procedure == 'LassoMLE')
{
#compute parameter estimations, with the Maximum Likelihood
#Estimator, restricted on selected variables.
models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
- mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, artefact = 1e3, verbose)
+ mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, artefact=1e3, fast, verbose)
}
else
{
#compute parameter estimations, with the Low Rank
#Estimator, restricted on selected variables.
models <- constructionModelesLassoRank(S$Pi, S$Rho, mini, maxi, X, Y, eps, A1,
- rank.min, rank.max, ncores_inner, verbose)
+ rank.min, rank.max, ncores_inner, fast, verbose)
}
#attention certains modeles sont NULL après selectVariables
models = models[sapply(models, function(cell) !is.null(cell))]
#' @export
#'
selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
- X,Y,thresh,tau, ncores=3)
+ X,Y,thresh,tau, ncores=3, fast=TRUE)
{
if (ncores > 1)
{
# Calcul pour un lambda
computeCoefs <- function(lambda)
{
- params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
+ params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
p = dim(phiInit)[1]
m = dim(phiInit)[2]
-source("EMGLLF.R")
source("helper.R")
generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10,
-source("EMGrank.R")
source("helper.R")
generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1.0,
+++ /dev/null
-EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
-{
- # Matrix dimensions
- n = dim(X)[1]
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
-
- # Outputs
- phi = phiInit
- rho = rhoInit
- pi = piInit
- llh = -Inf
- S = array(0, dim=c(p,m,k))
-
- # Algorithm variables
- gam = gamInit
- Gram2 = array(0, dim=c(p,p,k))
- ps2 = array(0, dim=c(p,m,k))
- X2 = array(0, dim=c(n,p,k))
- Y2 = array(0, dim=c(n,m,k))
- EPS = 1e-15
-
- for (ite in 1:maxi)
- {
- # Remember last pi,rho,phi values for exit condition in the end of loop
- Phi = phi
- Rho = rho
- Pi = pi
-
- # Calcul associé à Y et X
- for (r in 1:k)
- {
- for (mm in 1:m)
- Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
- for (i in 1:n)
- X2[i,,r] = sqrt(gam[i,r]) * X[i,]
- for (mm in 1:m)
- ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
- for (j in 1:p)
- {
- for (s in 1:p)
- Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
- }
- }
-
- ##########
- #Etape M #
- ##########
-
- # Pour pi
- b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
- gam2 = colSums(gam)
- a = sum(gam %*% log(pi))
-
- # Tant que les props sont negatives
- kk = 0
- pi2AllPositive = FALSE
- while (!pi2AllPositive)
- {
- pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
- pi2AllPositive = all(pi2 >= 0)
- kk = kk+1
- }
-
- # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
- while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
- -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
- {
- pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
- kk = kk + 1
- }
- t = 0.1^kk
- pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
- #Pour phi et rho
- for (r in 1:k)
- {
- for (mm in 1:m)
- {
- ps = 0
- for (i in 1:n)
- ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
- nY2 = sum(Y2[,mm,r]^2)
- rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
- }
- }
-
- for (r in 1:k)
- {
- for (j in 1:p)
- {
- for (mm in 1:m)
- {
- S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
- if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
- phi[j,mm,r]=0
- else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
- phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
- else
- phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
- }
- }
- }
-
- ##########
- #Etape E #
- ##########
-
- # Precompute det(rho[,,r]) for r in 1...k
- detRho = sapply(1:k, function(r) det(rho[,,r]))
-
- sumLogLLH = 0
- for (i in 1:n)
- {
- # Update gam[,]
- sumGamI = 0
- for (r in 1:k)
- {
- gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
- sumGamI = sumGamI + gam[i,r]
- }
- sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
- if (sumGamI > EPS) #else: gam[i,] is already ~=0
- gam[i,] = gam[i,] / sumGamI
- }
-
- sumPen = sum(pi^gamma * b)
- last_llh = llh
- llh = -sumLogLLH/n + lambda*sumPen
- dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
- Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
- Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
- Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
- dist2 = max(Dist1,Dist2,Dist3)
-
- if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
- break
- }
-
- affec = apply(gam, 1, which.max)
- list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
-}
+++ /dev/null
-#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
-# --> Yes, we should use by-columns storage everywhere... [later!]
-matricize <- function(X)
-{
- if (!is.matrix(X))
- return (t(as.matrix(X)))
- return (X)
-}
-
-require(MASS)
-EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
-{
- #matrix dimensions
- n = dim(X)[1]
- p = dim(X)[2]
- m = dim(Rho)[2]
- k = dim(Rho)[3]
-
- #init outputs
- phi = array(0, dim=c(p,m,k))
- Z = rep(1, n)
- LLF = 0
-
- #local variables
- Phi = array(0, dim=c(p,m,k))
- deltaPhi = c()
- sumDeltaPhi = 0.
- deltaPhiBufferSize = 20
-
- #main loop
- ite = 1
- while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
- {
- #M step: Mise à jour de Beta (et donc phi)
- for(r in 1:k)
- {
- Z_indice = seq_len(n)[Z==r] #indices où Z == r
- if (length(Z_indice) == 0)
- next
- #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
- s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
- crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
- S = s$d
- #Set m-rank(r) singular values to zero, and recompose
- #best rank(r) approximation of the initial product
- if(rank[r] < length(S))
- S[(rank[r]+1):length(S)] = 0
- phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
- }
-
- #Etape E et calcul de LLF
- sumLogLLF2 = 0
- for(i in seq_len(n))
- {
- sumLLF1 = 0
- maxLogGamIR = -Inf
- for (r in seq_len(k))
- {
- dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
- logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
- #Z[i] = index of max (gam[i,])
- if(logGamIR > maxLogGamIR)
- {
- Z[i] = r
- maxLogGamIR = logGamIR
- }
- sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
- }
- sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
- }
-
- LLF = -1/n * sumLogLLF2
-
- #update distance parameter to check algorithm convergence (delta(phi, Phi))
- deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
- if (length(deltaPhi) > deltaPhiBufferSize)
- deltaPhi = deltaPhi[2:length(deltaPhi)]
- sumDeltaPhi = sum(abs(deltaPhi))
-
- #update other local variables
- Phi = phi
- ite = ite+1
- }
- return(list("phi"=phi, "LLF"=LLF))
-}
fi
#1) Generate data using R versions of EMGLLF/EMGrank (slow, but trusted)
-cd generate_test_data/
echo -e "source('generateRunSaveTest_$algo.R');\n \
# I'm happy with default values - feel free to give args\n \
generateRunSaveTest_$algo() " \
| R --slave
-cd ..
#2) Compile test C code into an executable named "test.$algo"
make test.$algo