-#' EMGLLF
+#' EMGLLF
#'
#' Description de EMGLLF
#'
#' S : ... affec : ...
#'
#' @export
-EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
- mini, maxi, gamma, lambda, X, Y, eps, fast=TRUE)
-{
+EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps, fast = TRUE)
+ {
if (!fast)
{
# Function in R
- return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps))
+ return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps))
}
# Function in C
- n = nrow(X) #nombre d'echantillons
- p = ncol(X) #nombre de covariables
- m = ncol(Y) #taille de Y (multivarié)
- k = length(piInit) #nombre de composantes dans le mélange
- .Call("EMGLLF",
- phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, eps,
- phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi),
- S=double(p*m*k), affec=integer(n),
- n, p, m, k,
- PACKAGE="valse")
+ n <- nrow(X) #nombre d'echantillons
+ p <- ncol(X) #nombre de covariables
+ m <- ncol(Y) #taille de Y (multivarié)
+ k <- length(piInit) #nombre de composantes dans le mélange
+ .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k),
+ LLF = double(maxi), S = double(p * m * k), affec = integer(n), n, p, m, k,
+ PACKAGE = "valse")
}
# R version - slow but easy to read
-.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X2,Y,eps)
-{
+.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X2, Y, eps)
+ {
# Matrix dimensions
- n = dim(Y)[1]
- if (length(dim(phiInit)) == 2){
- p = 1
- m = dim(phiInit)[1]
- k = dim(phiInit)[2]
- } else {
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
+ n <- dim(Y)[1]
+ if (length(dim(phiInit)) == 2)
+ {
+ p <- 1
+ m <- dim(phiInit)[1]
+ k <- dim(phiInit)[2]
+ } else
+ {
+ p <- dim(phiInit)[1]
+ m <- dim(phiInit)[2]
+ k <- dim(phiInit)[3]
}
- X = matrix(nrow = n, ncol = p)
- X[1:n,1:p] = X2
+ X <- matrix(nrow = n, ncol = p)
+ X[1:n, 1:p] <- X2
# Outputs
- phi = array(NA, dim = c(p,m,k))
- phi[1:p,,] = phiInit
- rho = rhoInit
- pi = piInit
- llh = -Inf
- S = array(0, dim=c(p,m,k))
+ phi <- array(NA, dim = c(p, m, k))
+ phi[1:p, , ] <- phiInit
+ rho <- rhoInit
+ pi <- piInit
+ llh <- -Inf
+ S <- array(0, dim = c(p, m, k))
# Algorithm variables
- gam = gamInit
- Gram2 = array(0, dim=c(p,p,k))
- ps2 = array(0, dim=c(p,m,k))
- X2 = array(0, dim=c(n,p,k))
- Y2 = array(0, dim=c(n,m,k))
- EPS = 1e-15
+ gam <- gamInit
+ Gram2 <- array(0, dim = c(p, p, k))
+ ps2 <- array(0, dim = c(p, m, k))
+ X2 <- array(0, dim = c(n, p, k))
+ Y2 <- array(0, dim = c(n, m, k))
+ EPS <- 1e-15
for (ite in 1:maxi)
{
# Remember last pi,rho,phi values for exit condition in the end of loop
- Phi = phi
- Rho = rho
- Pi = pi
+ Phi <- phi
+ Rho <- rho
+ Pi <- pi
# Computations associated to X and Y
for (r in 1:k)
{
- for (mm in 1:m)
- Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
- for (i in 1:n)
- X2[i,,r] = sqrt(gam[i,r]) * X[i,]
- for (mm in 1:m)
- ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+ for (mm in 1:m) Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
+ for (i in 1:n) X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
+ for (mm in 1:m) ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
for (j in 1:p)
{
- for (s in 1:p)
- Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+ for (s in 1:p) Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
}
}
- #########
- #M step #
- #########
+ ######### M step #
# For pi
- b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
- gam2 = colSums(gam)
- a = sum(gam %*% log(pi))
+ b <- sapply(1:k, function(r) sum(abs(phi[, , r])))
+ gam2 <- colSums(gam)
+ a <- sum(gam %*% log(pi))
# While the proportions are nonpositive
- kk = 0
- pi2AllPositive = FALSE
+ kk <- 0
+ pi2AllPositive <- FALSE
while (!pi2AllPositive)
{
- pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
- pi2AllPositive = all(pi2 >= 0)
- kk = kk+1
+ pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi)
+ pi2AllPositive <- all(pi2 >= 0)
+ kk <- kk + 1
}
# t(m) is the largest value in the grid O.1^k such that it is nonincreasing
- while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
- -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
- {
- pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
- kk = kk + 1
+ while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < -sum(gam2 * log(pi2))/n +
+ lambda * sum(pi2^gamma * b))
+ {
+ pi2 <- pi + 0.1^kk * (1/n * gam2 - pi)
+ kk <- kk + 1
}
- t = 0.1^kk
- pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+ t <- 0.1^kk
+ pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi))
- #For phi and rho
+ # For phi and rho
for (r in 1:k)
{
for (mm in 1:m)
{
- ps = 0
- for (i in 1:n)
- ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
- nY2 = sum(Y2[,mm,r]^2)
- rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
+ ps <- 0
+ for (i in 1:n) ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm,
+ r])
+ nY2 <- sum(Y2[, mm, r]^2)
+ rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2)
}
}
{
for (mm in 1:m)
{
- S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
- if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
- phi[j,mm,r]=0
- else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
- phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
- else
- phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
+ S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + sum(phi[-j, mm,
+ r] * Gram2[j, -j, r])
+ if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma))
+ phi[j, mm, r] <- 0 else if (S[j, mm, r] > n * lambda * (pi[r]^gamma))
+ phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j,
+ j, r] else phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j,
+ j, r]
}
}
}
- ########
- #E step#
- ########
+ ######## E step#
# Precompute det(rho[,,r]) for r in 1...k
- detRho = sapply(1:k, function(r) det(rho[,,r]))
- gam1 = matrix(0, nrow = n, ncol = k)
+ detRho <- sapply(1:k, function(r) det(rho[, , r]))
+ gam1 <- matrix(0, nrow = n, ncol = k)
for (i in 1:n)
{
# Update gam[,]
for (r in 1:k)
{
- gam1[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
+ gam1[i, r] <- pi[r] * exp(-0.5 * sum((Y[i, ] %*% rho[, , r] - X[i,
+ ] %*% phi[, , r])^2)) * detRho[r]
}
}
- gam = gam1 / rowSums(gam1)
- sumLogLLH = sum(log(rowSums(gam)) - log((2*base::pi)^(m/2)))
- sumPen = sum(pi^gamma * b)
- last_llh = llh
- llh = -sumLogLLH/n + lambda*sumPen
- dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
- Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
- Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
- Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
- dist2 = max(Dist1,Dist2,Dist3)
+ gam <- gam1/rowSums(gam1)
+ sumLogLLH <- sum(log(rowSums(gam)) - log((2 * base::pi)^(m/2)))
+ sumPen <- sum(pi^gamma * b)
+ last_llh <- llh
+ llh <- -sumLogLLH/n + lambda * sumPen
+ dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh)))
+ Dist1 <- max((abs(phi - Phi))/(1 + abs(phi)))
+ Dist2 <- max((abs(rho - Rho))/(1 + abs(rho)))
+ Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi)))
+ dist2 <- max(Dist1, Dist2, Dist3)
- if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
+ if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
break
}
- list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S)
+ list(phi = phi, rho = rho, pi = pi, llh = llh, S = S)
}
-#' EMGrank
+#' EMGrank
#'
#' Description de EMGrank
#'
#' LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
#'
#' @export
-EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast=TRUE)
+EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast = TRUE)
{
- if (!fast)
- {
- # Function in R
- return (.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
- }
-
- # Function in C
- n = nrow(X) #nombre d'echantillons
- p = ncol(X) #nombre de covariables
- m = ncol(Y) #taille de Y (multivarié)
- k = length(Pi) #nombre de composantes dans le mélange
- .Call("EMGrank",
- Pi, Rho, mini, maxi, X, Y, tau, rank,
- phi=double(p*m*k), LLF=double(1),
- n, p, m, k,
- PACKAGE="valse")
+ if (!fast)
+ {
+ # Function in R
+ return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
+ }
+
+ # Function in C
+ n <- nrow(X) #nombre d'echantillons
+ p <- ncol(X) #nombre de covariables
+ m <- ncol(Y) #taille de Y (multivarié)
+ k <- length(Pi) #nombre de composantes dans le mélange
+ .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, tau, rank, phi = double(p * m * k),
+ LLF = double(1), n, p, m, k, PACKAGE = "valse")
}
-#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
-# --> Yes, we should use by-columns storage everywhere... [later!]
+# helper to always have matrices as arg (TODO: put this elsewhere? improve?) -->
+# Yes, we should use by-columns storage everywhere... [later!]
matricize <- function(X)
{
- if (!is.matrix(X))
- return (t(as.matrix(X)))
- return (X)
+ if (!is.matrix(X))
+ return(t(as.matrix(X)))
+ return(X)
}
# R version - slow but easy to read
-.EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
{
- #matrix dimensions
- n = dim(X)[1]
- p = dim(X)[2]
- m = dim(Rho)[2]
- k = dim(Rho)[3]
-
- #init outputs
- phi = array(0, dim=c(p,m,k))
- Z = rep(1, n)
- LLF = 0
-
- #local variables
- Phi = array(0, dim=c(p,m,k))
- deltaPhi = c()
- sumDeltaPhi = 0.
- deltaPhiBufferSize = 20
-
- #main loop
- ite = 1
- while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
- {
- #M step: update for Beta ( and then phi)
- for(r in 1:k)
- {
- Z_indice = seq_len(n)[Z==r] #indices where Z == r
- if (length(Z_indice) == 0)
+ # matrix dimensions
+ n <- dim(X)[1]
+ p <- dim(X)[2]
+ m <- dim(Rho)[2]
+ k <- dim(Rho)[3]
+
+ # init outputs
+ phi <- array(0, dim = c(p, m, k))
+ Z <- rep(1, n)
+ LLF <- 0
+
+ # local variables
+ Phi <- array(0, dim = c(p, m, k))
+ deltaPhi <- c()
+ sumDeltaPhi <- 0
+ deltaPhiBufferSize <- 20
+
+ # main loop
+ ite <- 1
+ while (ite <= mini || (ite <= maxi && sumDeltaPhi > tau))
+ {
+ # M step: update for Beta ( and then phi)
+ for (r in 1:k)
+ {
+ Z_indice <- seq_len(n)[Z == r] #indices where Z == r
+ if (length(Z_indice) == 0)
next
- #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
- s = svd( MASS::ginv(crossprod(matricize(X[Z_indice,]))) %*%
- crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
- S = s$d
- #Set m-rank(r) singular values to zero, and recompose
- #best rank(r) approximation of the initial product
- if(rank[r] < length(S))
- S[(rank[r]+1):length(S)] = 0
- phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
+ # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+ s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% crossprod(matricize(X[Z_indice,
+ ]), matricize(Y[Z_indice, ])))
+ S <- s$d
+ # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation
+ # of the initial product
+ if (rank[r] < length(S))
+ S[(rank[r] + 1):length(S)] <- 0
+ phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r]
}
-
- #Step E and computation of the loglikelihood
- sumLogLLF2 = 0
- for(i in seq_len(n))
- {
- sumLLF1 = 0
- maxLogGamIR = -Inf
- for (r in seq_len(k))
- {
- dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
- logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
- #Z[i] = index of max (gam[i,])
- if(logGamIR > maxLogGamIR)
- {
- Z[i] = r
- maxLogGamIR = logGamIR
- }
- sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
- }
- sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
- }
-
- LLF = -1/n * sumLogLLF2
-
- #update distance parameter to check algorithm convergence (delta(phi, Phi))
- deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
- if (length(deltaPhi) > deltaPhiBufferSize)
- deltaPhi = deltaPhi[2:length(deltaPhi)]
- sumDeltaPhi = sum(abs(deltaPhi))
-
- #update other local variables
- Phi = phi
- ite = ite+1
+
+ # Step E and computation of the loglikelihood
+ sumLogLLF2 <- 0
+ for (i in seq_len(n))
+ {
+ sumLLF1 <- 0
+ maxLogGamIR <- -Inf
+ for (r in seq_len(k))
+ {
+ dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[,
+ , r])
+ logGamIR <- log(Pi[r]) + log(det(Rho[, , r])) - 0.5 * dotProduct
+ # Z[i] = index of max (gam[i,])
+ if (logGamIR > maxLogGamIR)
+ {
+ Z[i] <- r
+ maxLogGamIR <- logGamIR
+ }
+ sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2)
+ }
+ sumLogLLF2 <- sumLogLLF2 + log(sumLLF1)
+ }
+
+ LLF <- -1/n * sumLogLLF2
+
+ # update distance parameter to check algorithm convergence (delta(phi, Phi))
+ deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
+ if (length(deltaPhi) > deltaPhiBufferSize)
+ deltaPhi <- deltaPhi[2:length(deltaPhi)]
+ sumDeltaPhi <- sum(abs(deltaPhi))
+
+ # update other local variables
+ Phi <- phi
+ ite <- ite + 1
}
- return(list("phi"=phi, "LLF"=LLF))
+ return(list(phi = phi, LLF = LLF))
}
-#' computeGridLambda
+#' computeGridLambda
#'
#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator
#'
#' @param phiInit value for phi
-#' @param rhoInit value for rho
-#' @param piInit value for pi
+#' @param rhoInit\tvalue for rho
+#' @param piInit\tvalue for pi
#' @param gamInit value for gamma
#' @param X matrix of covariates (of size n*p)
#' @param Y matrix of responses (of size n*m)
#' @return the grid of regularization parameters
#'
#' @export
-computeGridLambda = function(phiInit, rhoInit, piInit, gamInit, X, Y,
- gamma, mini, maxi, tau, fast=TRUE)
-{
- n = nrow(X)
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
-
- list_EMG = EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi,
- gamma, lambda=0, X, Y, tau, fast)
- grid = array(0, dim=c(p,m,k))
- for (i in 1:p)
- {
- for (j in 1:m)
- grid[i,j,] = abs(list_EMG$S[i,j,]) / (n*list_EMG$pi^gamma)
- }
- sort( unique(grid) )
+computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini,
+ maxi, tau, fast = TRUE)
+ {
+ n <- nrow(X)
+ p <- dim(phiInit)[1]
+ m <- dim(phiInit)[2]
+ k <- dim(phiInit)[3]
+
+ list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0,
+ X, Y, tau, fast)
+ grid <- array(0, dim = c(p, m, k))
+ for (i in 1:p)
+ {
+ for (j in 1:m) grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma)
+ }
+ sort(unique(grid))
}
-#' constructionModelesLassoMLE
+#' constructionModelesLassoMLE
#'
#' Construct a collection of models with the Lasso-MLE procedure.
#'
#' @return a list with several models, defined by phi, rho, pi, llh
#'
#' @export
-constructionModelesLassoMLE = function( phiInit, rhoInit, piInit, gamInit, mini, maxi,gamma, X, Y,
- eps, S, ncores=3, fast=TRUE, verbose=FALSE)
-{
- if (ncores > 1)
- {
- cl = parallel::makeCluster(ncores, outfile='')
- parallel::clusterExport( cl, envir=environment(),
- varlist=c("phiInit","rhoInit","gamInit","mini","maxi","gamma","X","Y","eps",
- "S","ncores","fast","verbose") )
- }
-
- # Individual model computation
- computeAtLambda <- function(lambda)
- {
- if (ncores > 1)
- require("valse") #nodes start with an empty environment
-
- if (verbose)
- print(paste("Computations for lambda=",lambda))
-
- n = dim(X)[1]
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
- sel.lambda = S[[lambda]]$selected
-# col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
- col.sel <- which( sapply(sel.lambda,length) > 0 ) #if list of selected vars
- if (length(col.sel) == 0)
- return (NULL)
-
- # lambda == 0 because we compute the EMV: no penalization here
- res = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0,
- X[,col.sel], Y, eps, fast)
-
- # Eval dimension from the result + selected
- phiLambda2 = res$phi
- rhoLambda = res$rho
- piLambda = res$pi
- phiLambda = array(0, dim = c(p,m,k))
- for (j in seq_along(col.sel))
- phiLambda[col.sel[j],sel.lambda[[j]],] = phiLambda2[j,sel.lambda[[j]],]
- dimension = length(unlist(sel.lambda))
-
- # Computation of the loglikelihood
- densite = vector("double",n)
- for (r in 1:k)
- {
- if (length(col.sel)==1){
- delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%t(phiLambda[col.sel,,r])))
- } else delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r]))
- densite = densite + piLambda[r] *
- det(rhoLambda[,,r])/(sqrt(2*base::pi))^m * exp(-diag(tcrossprod(delta))/2.0)
- }
- llhLambda = c( sum(log(densite)), (dimension+m+1)*k-1 )
- list("phi"= phiLambda, "rho"= rhoLambda, "pi"= piLambda, "llh" = llhLambda)
- }
-
- # For each lambda, computation of the parameters
- out =
- if (ncores > 1)
- parLapply(cl, 1:length(S), computeAtLambda)
- else
- lapply(1:length(S), computeAtLambda)
-
- if (ncores > 1)
- parallel::stopCluster(cl)
-
- out
+constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
+ maxi, gamma, X, Y, eps, S, ncores = 3, fast = TRUE, verbose = FALSE)
+ {
+ if (ncores > 1)
+ {
+ cl <- parallel::makeCluster(ncores, outfile = "")
+ parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit",
+ "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S",
+ "ncores", "fast", "verbose"))
+ }
+
+ # Individual model computation
+ computeAtLambda <- function(lambda)
+ {
+ if (ncores > 1)
+ require("valse") #nodes start with an empty environment
+
+ if (verbose)
+ print(paste("Computations for lambda=", lambda))
+
+ n <- dim(X)[1]
+ p <- dim(phiInit)[1]
+ m <- dim(phiInit)[2]
+ k <- dim(phiInit)[3]
+ sel.lambda <- S[[lambda]]$selected
+ # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
+ col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars
+ if (length(col.sel) == 0)
+ return(NULL)
+
+ # lambda == 0 because we compute the EMV: no penalization here
+ res <- EMGLLF(phiInit[col.sel, , ], rhoInit, piInit, gamInit, mini, maxi,
+ gamma, 0, X[, col.sel], Y, eps, fast)
+
+ # Eval dimension from the result + selected
+ phiLambda2 <- res$phi
+ rhoLambda <- res$rho
+ piLambda <- res$pi
+ phiLambda <- array(0, dim = c(p, m, k))
+ for (j in seq_along(col.sel)) phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j,
+ sel.lambda[[j]], ]
+ dimension <- length(unlist(sel.lambda))
+
+ # Computation of the loglikelihood
+ densite <- vector("double", n)
+ for (r in 1:k)
+ {
+ if (length(col.sel) == 1)
+ {
+ delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel,
+ , r])))
+ } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel,
+ , r]))
+ densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m *
+ exp(-diag(tcrossprod(delta))/2)
+ }
+ llhLambda <- c(sum(log(densite)), (dimension + m + 1) * k - 1)
+ list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda)
+ }
+
+ # For each lambda, computation of the parameters
+ out <- if (ncores > 1)
+ parLapply(cl, 1:length(S), computeAtLambda) else lapply(1:length(S), computeAtLambda)
+
+ if (ncores > 1)
+ parallel::stopCluster(cl)
+
+ out
}
-#' generateXY
+#' generateXY
#'
#' Generate a sample of (X,Y) of size n
#'
#' @return list with X and Y
#'
#' @export
-generateXY = function(n, π, meanX, β, covX, covY)
+generateXY <- function(n, π, meanX, β, covX, covY)
{
- p <- dim(covX)[1]
- m <- dim(covY)[1]
- k <- dim(covY)[3]
-
- X <- matrix(nrow=0, ncol=p)
- Y <- matrix(nrow=0, ncol=m)
-
- #random generation of the size of each population in X~Y (unordered)
- sizePop <- rmultinom(1, n, π)
- class <- c() #map i in 1:n --> index of class in 1:k
-
- for (i in 1:k)
- {
- class <- c(class, rep(i, sizePop[i]))
- newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
- X <- rbind( X, newBlockX )
- Y <- rbind( Y, t(apply( newBlockX, 1, function(row)
- MASS::mvrnorm(1, row %*% β[,,i], covY[,,i]) )) )
- }
-
- shuffle = sample(n)
- list("X"=X[shuffle,], "Y"=Y[shuffle,], "class"=class[shuffle])
+ p <- dim(covX)[1]
+ m <- dim(covY)[1]
+ k <- dim(covY)[3]
+
+ X <- matrix(nrow = 0, ncol = p)
+ Y <- matrix(nrow = 0, ncol = m)
+
+ # random generation of the size of each population in X~Y (unordered)
+ sizePop <- rmultinom(1, n, π)
+ class <- c() #map i in 1:n --> index of class in 1:k
+
+ for (i in 1:k)
+ {
+ class <- c(class, rep(i, sizePop[i]))
+ newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
+ X <- rbind(X, newBlockX)
+ Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*%
+ β[, , i], covY[, , i]))))
+ }
+
+ shuffle <- sample(n)
+ list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle])
}
-#' initialization of the EM algorithm
+#' initialization of the EM algorithm
#'
#' @param k number of components
#' @param X matrix of covariates (of size n*p)
#' @export
#' @importFrom methods new
#' @importFrom stats cutree dist hclust runif
-initSmallEM = function(k,X,Y, fast=TRUE)
+initSmallEM <- function(k, X, Y, fast = TRUE)
{
- n = nrow(Y)
- m = ncol(Y)
- p = ncol(X)
- nIte = 20
- Zinit1 = array(0, dim=c(n,nIte))
- betaInit1 = array(0, dim=c(p,m,k,nIte))
- sigmaInit1 = array(0, dim = c(m,m,k,nIte))
- phiInit1 = array(0, dim = c(p,m,k,nIte))
- rhoInit1 = array(0, dim = c(m,m,k,nIte))
- Gam = matrix(0, n, k)
- piInit1 = matrix(0,nIte,k)
- gamInit1 = array(0, dim=c(n,k,nIte))
- LLFinit1 = list()
-
- #require(MASS) #Moore-Penrose generalized inverse of matrix
- for(repet in 1:nIte)
- {
- distance_clus = dist(cbind(X,Y))
- tree_hier = hclust(distance_clus)
- Zinit1[,repet] = cutree(tree_hier, k)
-
- for(r in 1:k)
- {
- Z = Zinit1[,repet]
- Z_indice = seq_len(n)[Z == r] #renvoit les indices où Z==r
- if (length(Z_indice) == 1) {
- betaInit1[,,r,repet] = MASS::ginv(crossprod(t(X[Z_indice,]))) %*%
- crossprod(t(X[Z_indice,]), Y[Z_indice,])
- } else {
- betaInit1[,,r,repet] = MASS::ginv(crossprod(X[Z_indice,])) %*%
- crossprod(X[Z_indice,], Y[Z_indice,])
- }
- sigmaInit1[,,r,repet] = diag(m)
- phiInit1[,,r,repet] = betaInit1[,,r,repet] #/ sigmaInit1[,,r,repet]
- rhoInit1[,,r,repet] = solve(sigmaInit1[,,r,repet])
- piInit1[repet,r] = mean(Z == r)
- }
-
- for(i in 1:n)
- {
- for(r in 1:k)
- {
- dotProduct = tcrossprod(Y[i,]%*%rhoInit1[,,r,repet]-X[i,]%*%phiInit1[,,r,repet])
- Gam[i,r] = piInit1[repet,r]*det(rhoInit1[,,r,repet])*exp(-0.5*dotProduct)
- }
- sumGamI = sum(Gam[i,])
- gamInit1[i,,repet]= Gam[i,] / sumGamI
- }
-
- miniInit = 10
- maxiInit = 11
-
- init_EMG = EMGLLF(phiInit1[,,,repet], rhoInit1[,,,repet], piInit1[repet,],
- gamInit1[,,repet], miniInit, maxiInit, gamma=1, lambda=0, X, Y, eps=1e-4, fast)
- LLFEessai = init_EMG$LLF
- LLFinit1[repet] = LLFEessai[length(LLFEessai)]
- }
- b = which.min(LLFinit1)
- phiInit = phiInit1[,,,b]
- rhoInit = rhoInit1[,,,b]
- piInit = piInit1[b,]
- gamInit = gamInit1[,,b]
-
- return (list(phiInit=phiInit, rhoInit=rhoInit, piInit=piInit, gamInit=gamInit))
+ n <- nrow(Y)
+ m <- ncol(Y)
+ p <- ncol(X)
+ nIte <- 20
+ Zinit1 <- array(0, dim = c(n, nIte))
+ betaInit1 <- array(0, dim = c(p, m, k, nIte))
+ sigmaInit1 <- array(0, dim = c(m, m, k, nIte))
+ phiInit1 <- array(0, dim = c(p, m, k, nIte))
+ rhoInit1 <- array(0, dim = c(m, m, k, nIte))
+ Gam <- matrix(0, n, k)
+ piInit1 <- matrix(0, nIte, k)
+ gamInit1 <- array(0, dim = c(n, k, nIte))
+ LLFinit1 <- list()
+
+ # require(MASS) #Moore-Penrose generalized inverse of matrix
+ for (repet in 1:nIte)
+ {
+ distance_clus <- dist(cbind(X, Y))
+ tree_hier <- hclust(distance_clus)
+ Zinit1[, repet] <- cutree(tree_hier, k)
+
+ for (r in 1:k)
+ {
+ Z <- Zinit1[, repet]
+ Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r
+ if (length(Z_indice) == 1)
+ {
+ betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*%
+ crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
+ } else
+ {
+ betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*%
+ crossprod(X[Z_indice, ], Y[Z_indice, ])
+ }
+ sigmaInit1[, , r, repet] <- diag(m)
+ phiInit1[, , r, repet] <- betaInit1[, , r, repet] #/ sigmaInit1[,,r,repet]
+ rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
+ piInit1[repet, r] <- mean(Z == r)
+ }
+
+ for (i in 1:n)
+ {
+ for (r in 1:k)
+ {
+ dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] - X[i,
+ ] %*% phiInit1[, , r, repet])
+ Gam[i, r] <- piInit1[repet, r] * det(rhoInit1[, , r, repet]) * exp(-0.5 *
+ dotProduct)
+ }
+ sumGamI <- sum(Gam[i, ])
+ gamInit1[i, , repet] <- Gam[i, ]/sumGamI
+ }
+
+ miniInit <- 10
+ maxiInit <- 11
+
+ init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet,
+ ], gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X,
+ Y, eps = 1e-04, fast)
+ LLFEessai <- init_EMG$LLF
+ LLFinit1[repet] <- LLFEessai[length(LLFEessai)]
+ }
+ b <- which.min(LLFinit1)
+ phiInit <- phiInit1[, , , b]
+ rhoInit <- rhoInit1[, , , b]
+ piInit <- piInit1[b, ]
+ gamInit <- gamInit1[, , b]
+
+ return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
}
-#' valse
+#' valse
#'
#' Main function
#'
#' @examples
#' #TODO: a few examples
#' @export
-valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10, maxi=50,
- eps=1e-4, kmin=2, kmax=3, rank.min=1, rank.max=5, ncores_outer=1, ncores_inner=1,
- thresh=1e-8,
- size_coll_mod=10, fast=TRUE, verbose=FALSE, plot = TRUE)
-{
- p = dim(X)[2]
- m = dim(Y)[2]
- n = dim(X)[1]
+valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10,
+ maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1,
+ ncores_inner = 1, thresh = 1e-08, size_coll_mod = 10, fast = TRUE, verbose = FALSE,
+ plot = TRUE)
+ {
+ p <- dim(X)[2]
+ m <- dim(Y)[2]
+ n <- dim(X)[1]
- if (verbose)
+ if (verbose)
print("main loop: over all k and all lambda")
if (ncores_outer > 1)
{
- cl = parallel::makeCluster(ncores_outer, outfile='')
- parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure",
- "selecMod","gamma","mini","maxi","eps","kmin","kmax","rank.min","rank.max",
- "ncores_outer","ncores_inner","thresh","size_coll_mod","verbose","p","m") )
+ cl <- parallel::makeCluster(ncores_outer, outfile = "")
+ parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X",
+ "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin",
+ "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh",
+ "size_coll_mod", "verbose", "p", "m"))
}
# Compute models with k components
computeModels <- function(k)
{
- if (ncores_outer > 1)
- require("valse") #nodes start with an empty environment
+ if (ncores_outer > 1)
+ require("valse") #nodes start with an empty environment
- if (verbose)
- print(paste("Parameters initialization for k =",k))
- #smallEM initializes parameters by k-means and regression model in each component,
- #doing this 20 times, and keeping the values maximizing the likelihood after 10
- #iterations of the EM algorithm.
- P = initSmallEM(k, X, Y)
- grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, X, Y,
- gamma, mini, maxi, eps, fast)
- if (length(grid_lambda)>size_coll_mod)
- grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
+ if (verbose)
+ print(paste("Parameters initialization for k =", k))
+ # smallEM initializes parameters by k-means and regression model in each
+ # component, doing this 20 times, and keeping the values maximizing the
+ # likelihood after 10 iterations of the EM algorithm.
+ P <- initSmallEM(k, X, Y)
+ grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
+ X, Y, gamma, mini, maxi, eps, fast)
+ if (length(grid_lambda) > size_coll_mod)
+ grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
- if (verbose)
+ if (verbose)
print("Compute relevant parameters")
- #select variables according to each regularization parameter
- #from the grid: S$selected corresponding to selected variables
- S = selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma,
- grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
+ # select variables according to each regularization parameter from the grid:
+ # S$selected corresponding to selected variables
+ S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi,
+ gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
- if (procedure == 'LassoMLE')
+ if (procedure == "LassoMLE")
{
- if (verbose)
- print('run the procedure Lasso-MLE')
- #compute parameter estimations, with the Maximum Likelihood
- #Estimator, restricted on selected variables.
- models <- constructionModelesLassoMLE( P$phiInit, P$rhoInit, P$piInit, P$gamInit,
- mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
+ if (verbose)
+ print("run the procedure Lasso-MLE")
+ # compute parameter estimations, with the Maximum Likelihood Estimator,
+ # restricted on selected variables.
+ models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit,
+ P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
- }
- else
+ } else
{
- if (verbose)
- print('run the procedure Lasso-Rank')
- #compute parameter estimations, with the Low Rank
- #Estimator, restricted on selected variables.
- models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps,
- rank.min, rank.max, ncores_inner, fast, verbose)
+ if (verbose)
+ print("run the procedure Lasso-Rank")
+ # compute parameter estimations, with the Low Rank Estimator, restricted on
+ # selected variables.
+ models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min,
+ rank.max, ncores_inner, fast, verbose)
}
- #warning! Some models are NULL after running selectVariables
- models = models[sapply(models, function(cell) !is.null(cell))]
+ # warning! Some models are NULL after running selectVariables
+ models <- models[sapply(models, function(cell) !is.null(cell))]
models
}
# List (index k) of lists (index lambda) of models
- models_list <-
- if (ncores_outer > 1)
- parLapply(cl, kmin:kmax, computeModels)
- else
- lapply(kmin:kmax, computeModels)
- if (ncores_outer > 1)
+ models_list <- if (ncores_outer > 1)
+ parLapply(cl, kmin:kmax, computeModels) else lapply(kmin:kmax, computeModels)
+ if (ncores_outer > 1)
parallel::stopCluster(cl)
- if (! requireNamespace("capushe", quietly=TRUE))
+ if (!requireNamespace("capushe", quietly = TRUE))
{
warning("'capushe' not available: returning all models")
- return (models_list)
+ return(models_list)
}
- # Get summary "tableauRecap" from models
- tableauRecap = do.call( rbind, lapply( seq_along(models_list), function(i) {
+ # Get summary 'tableauRecap' from models
+ tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
+ {
models <- models_list[[i]]
- #For a collection of models (same k, several lambda):
- LLH <- sapply( models, function(model) model$llh[1] )
- k = length(models[[1]]$pi)
- sumPen = sapply(models, function(model)
- k*(dim(model$rho)[1]+sum(model$phi[,,1]!=0)+1)-1)
- data.frame(model=paste(i,".",seq_along(models),sep=""),
- pen=sumPen/n, complexity=sumPen, contrast=-LLH)
- } ) )
+ # For a collection of models (same k, several lambda):
+ LLH <- sapply(models, function(model) model$llh[1])
+ k <- length(models[[1]]$pi)
+ sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[,
+ , 1] != 0) + 1) - 1)
+ data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n,
+ complexity = sumPen, contrast = -LLH)
+ }))
print(tableauRecap)
- tableauRecap = tableauRecap[which(tableauRecap[,4]!= Inf),]
+ tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
- modSel = capushe::capushe(tableauRecap, n)
- indModSel <-
- if (selecMod == 'DDSE')
- as.numeric(modSel@DDSE@model)
- else if (selecMod == 'Djump')
- as.numeric(modSel@Djump@model)
- else if (selecMod == 'BIC')
- modSel@BIC_capushe$model
- else if (selecMod == 'AIC')
+ modSel <- capushe::capushe(tableauRecap, n)
+ indModSel <- if (selecMod == "DDSE")
+ as.numeric(modSel@DDSE@model) else if (selecMod == "Djump")
+ as.numeric(modSel@Djump@model) else if (selecMod == "BIC")
+ modSel@BIC_capushe$model else if (selecMod == "AIC")
modSel@AIC_capushe$model
-
- mod = as.character(tableauRecap[indModSel,1])
- listMod = as.integer(unlist(strsplit(mod, "[.]")))
- modelSel = models_list[[listMod[1]]][[listMod[2]]]
-
- ##Affectations
- Gam = matrix(0, ncol = length(modelSel$pi), nrow = n)
- for (i in 1:n){
- for (r in 1:length(modelSel$pi)){
- sqNorm2 = sum( (Y[i,]%*%modelSel$rho[,,r]-X[i,]%*%modelSel$phi[,,r])^2 )
- Gam[i,r] = modelSel$pi[r] * exp(-0.5*sqNorm2)* det(modelSel$rho[,,r])
+
+ mod <- as.character(tableauRecap[indModSel, 1])
+ listMod <- as.integer(unlist(strsplit(mod, "[.]")))
+ modelSel <- models_list[[listMod[1]]][[listMod[2]]]
+
+ ## Affectations
+ Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n)
+ for (i in 1:n)
+ {
+ for (r in 1:length(modelSel$pi))
+ {
+ sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[,
+ , r])^2)
+ Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[,
+ , r])
}
}
- Gam = Gam/rowSums(Gam)
- modelSel$affec = apply(Gam, 1,which.max)
- modelSel$proba = Gam
-
- if (plot){
- print(plot_valse(X,Y,modelSel,n))
+ Gam <- Gam/rowSums(Gam)
+ modelSel$affec <- apply(Gam, 1, which.max)
+ modelSel$proba <- Gam
+
+ if (plot)
+ {
+ print(plot_valse(X, Y, modelSel, n))
}
-
+
return(modelSel)
}
-#' Plot
+#' Plot
#'
#' It is a function which plots relevant parameters
#'
#'
#' @export
#'
-plot_valse = function(X,Y,model,n, comp = FALSE, k1 = NA, k2 = NA){
+plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
+{
require("gridExtra")
require("ggplot2")
require("reshape2")
require("cowplot")
- K = length(model$pi)
+ K <- length(model$pi)
## regression matrices
- gReg = list()
- for (r in 1:K){
- Melt = melt(t((model$phi[,,r])))
- gReg[[r]] = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
- scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") +
- ggtitle(paste("Regression matrices in cluster",r))
+ gReg <- list()
+ for (r in 1:K)
+ {
+ Melt <- melt(t((model$phi[, , r])))
+ gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) +
+ geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
+ midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster",
+ r))
}
print(gReg)
## Differences between two clusters
- if (comp){
- if (is.na(k1) || is.na(k)){print('k1 and k2 must be integers, representing the clusters you want to compare')}
- Melt = melt(t(model$phi[,,k1]-model$phi[,,k2]))
- gDiff = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
- scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") +
- ggtitle(paste("Difference between regression matrices in cluster",k1, "and", k2))
+ if (comp)
+ {
+ if (is.na(k1) || is.na(k))
+ {
+ print("k1 and k2 must be integers, representing the clusters you want to compare")
+ }
+ Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
+ gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + geom_tile() +
+ scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+ space = "Lab") + ggtitle(paste("Difference between regression matrices in cluster",
+ k1, "and", k2))
print(gDiff)
}
### Covariance matrices
- matCov = matrix(NA, nrow = dim(model$rho[,,1])[1], ncol = K)
- for (r in 1:K){
- matCov[,r] = diag(model$rho[,,r])
+ matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
+ for (r in 1:K)
+ {
+ matCov[, r] <- diag(model$rho[, , r])
}
- MeltCov = melt(matCov)
- gCov = ggplot(data =MeltCov, aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
- scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") +
- ggtitle("Covariance matrices")
- print(gCov )
+ MeltCov <- melt(matCov)
+ gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() +
+ scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+ space = "Lab") + ggtitle("Covariance matrices")
+ print(gCov)
### Proportions
- gam2 = matrix(NA, ncol = K, nrow = n)
- for (i in 1:n){
- gam2[i, ] = c(model$proba[i, model$affec[i]], model$affec[i])
+ gam2 <- matrix(NA, ncol = K, nrow = n)
+ for (i in 1:n)
+ {
+ gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
}
- bp <- ggplot(data.frame(gam2), aes(x=X2, y=X1, color=X2, group = X2)) +
- geom_boxplot() + theme(legend.position = "none")+ background_grid(major = "xy", minor = "none")
+ bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) +
+ geom_boxplot() + theme(legend.position = "none") + background_grid(major = "xy",
+ minor = "none")
print(bp)
### Mean in each cluster
- XY = cbind(X,Y)
- XY_class= list()
- meanPerClass= matrix(0, ncol = K, nrow = dim(XY)[2])
- for (r in 1:K){
- XY_class[[r]] = XY[model$affec == r, ]
- if (sum(model$affec==r) == 1){
- meanPerClass[,r] = XY_class[[r]]
- } else {
- meanPerClass[,r] = apply(XY_class[[r]], 2, mean)
+ XY <- cbind(X, Y)
+ XY_class <- list()
+ meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2])
+ for (r in 1:K)
+ {
+ XY_class[[r]] <- XY[model$affec == r, ]
+ if (sum(model$affec == r) == 1)
+ {
+ meanPerClass[, r] <- XY_class[[r]]
+ } else
+ {
+ meanPerClass[, r] <- apply(XY_class[[r]], 2, mean)
}
}
- data = data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2],K))
- g = ggplot(data, aes(x=time, y = mean, group = cluster, color = cluster))
- print(g + geom_line(aes(linetype=cluster, color=cluster))+ geom_point(aes(color=cluster)) + ggtitle('Mean per cluster'))
+ data <- data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K,
+ each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
+ g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
+ print(g + geom_line(aes(linetype = cluster, color = cluster)) + geom_point(aes(color = cluster)) +
+ ggtitle("Mean per cluster"))
-}
\ No newline at end of file
+}
-#' selectVariables
+#' selectVariables
#'
#' It is a function which construct, for a given lambda, the sets of relevant variables.
#'
#' @param phiInit an initial estimator for phi (size: p*m*k)
#' @param rhoInit an initial estimator for rho (size: m*m*k)
-#' @param piInit an initial estimator for pi (size : k)
+#' @param piInit\tan initial estimator for pi (size : k)
#' @param gamInit an initial estimator for gamma
-#' @param mini minimum number of iterations in EM algorithm
-#' @param maxi maximum number of iterations in EM algorithm
-#' @param gamma power in the penalty
+#' @param mini\t\tminimum number of iterations in EM algorithm
+#' @param maxi\t\tmaximum number of iterations in EM algorithm
+#' @param gamma\t power in the penalty
#' @param glambda grid of regularization parameters
-#' @param X matrix of regressors
-#' @param Y matrix of responses
+#' @param X\t\t\t matrix of regressors
+#' @param Y\t\t\t matrix of responses
#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
-#' @param eps threshold to say that EM algorithm has converged
+#' @param eps\t\t threshold to say that EM algorithm has converged
#' @param ncores Number or cores for parallel execution (1 to disable)
#'
#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
#'
#' @export
#'
-selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
- X,Y,thresh=1e-8,eps, ncores=3, fast=TRUE)
-{
+selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma,
+ glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast = TRUE)
+ {
if (ncores > 1)
{
- cl = parallel::makeCluster(ncores, outfile='')
- parallel::clusterExport(cl=cl,
- varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","eps"),
- envir=environment())
+ cl <- parallel::makeCluster(ncores, outfile = "")
+ parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit",
+ "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
}
# Computation for a fixed lambda
computeCoefs <- function(lambda)
{
- params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,fast)
+ params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps, fast)
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
+ p <- dim(phiInit)[1]
+ m <- dim(phiInit)[2]
- #selectedVariables: list where element j contains vector of selected variables in [1,m]
- selectedVariables = lapply(1:p, function(j) {
- #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
- #and finally return the corresponding indices
- seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
+ # selectedVariables: list where element j contains vector of selected variables
+ # in [1,m]
+ selectedVariables <- lapply(1:p, function(j)
+ {
+ # from boolean matrix mxk of selected variables obtain the corresponding boolean
+ # m-vector, and finally return the corresponding indices
+ seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
})
- list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
+ list(selected = selectedVariables, Rho = params$rho, Pi = params$pi)
}
# For each lambda in the grid, we compute the coefficients
- out <-
- if (ncores > 1)
- parLapply(cl, glambda, computeCoefs)
- else
- lapply(glambda, computeCoefs)
- if (ncores > 1)
+ out <- if (ncores > 1)
+ parLapply(cl, glambda, computeCoefs) else lapply(glambda, computeCoefs)
+ if (ncores > 1)
parallel::stopCluster(cl)
- # Suppress models which are computed twice
- #En fait, ca ca fait la comparaison de tous les parametres
- #On veut juste supprimer ceux qui ont les memes variables sélectionnées
- #sha1_array <- lapply(out, digest::sha1)
- #out[ duplicated(sha1_array) ]
- selec = lapply(out, function(model) model$selected)
- ind_dup = duplicated(selec)
- ind_uniq = which(!ind_dup)
- out2 = list()
- for (l in 1:length(ind_uniq)){
- out2[[l]] = out[[ind_uniq[l]]]
+ # Suppress models which are computed twice En fait, ca ca fait la comparaison de
+ # tous les parametres On veut juste supprimer ceux qui ont les memes variables
+ # sélectionnées sha1_array <- lapply(out, digest::sha1) out[
+ # duplicated(sha1_array) ]
+ selec <- lapply(out, function(model) model$selected)
+ ind_dup <- duplicated(selec)
+ ind_uniq <- which(!ind_dup)
+ out2 <- list()
+ for (l in 1:length(ind_uniq))
+ {
+ out2[[l]] <- out[[ind_uniq[l]]]
}
out2
}