License: MIT + file LICENSE
RoxygenNote: 5.0.1
Collate:
- 'plot.R'
+ 'plot_valse.R'
'main.R'
'selectVariables.R'
'constructionModelesLassoRank.R'
'EMGLLF.R'
'generateXY.R'
'A_NAMESPACE.R'
- 'plot_valse.R'
#' @include constructionModelesLassoRank.R
#' @include selectVariables.R
#' @include main.R
-#' @include plot.R
+#' @include plot_valse.R
#'
#' @useDynLib valse
#'
#'
#' Description de EMGLLF
#'
-#' @param phiInit Parametre initial de moyenne renormalisé
-#' @param rhoInit Parametre initial de variance renormalisé
-#' @param piInit Parametre initial des proportions
-#' @param gamInit Paramètre initial des probabilités a posteriori de chaque échantillon
-#' @param mini Nombre minimal d'itérations dans l'algorithme EM
-#' @param maxi Nombre maximal d'itérations dans l'algorithme EM
-#' @param gamma Puissance des proportions dans la pénalisation pour un Lasso adaptatif
-#' @param lambda Valeur du paramètre de régularisation du Lasso
-#' @param X Régresseurs
-#' @param Y Réponse
-#' @param tau Seuil pour accepter la convergence
+#' @param phiInit an initialization for phi
+#' @param rhoInit an initialization for rho
+#' @param piInit an initialization for pi
+#' @param gamInit initialization for the a posteriori probabilities
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param lambda regularization parameter in the Lasso estimation
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
#'
#' @return A list ... phi,rho,pi,LLF,S,affec:
#' phi : parametre de moyenne renormalisé, calculé par l'EM
#'
#' @export
EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
- mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE)
+ mini, maxi, gamma, lambda, X, Y, eps, fast=TRUE)
{
if (!fast)
{
#'
#' Description de EMGrank
#'
-#' @param phiInit ...
#' @param Pi Parametre de proportion
#' @param Rho Parametre initial de variance renormalisé
#' @param mini Nombre minimal d'itérations dans l'algorithme EM
# R version - slow but easy to read
.EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
{
+ require(MASS)
#matrix dimensions
n = dim(X)[1]
p = dim(X)[2]
ite = 1
while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
{
- #M step: Mise à jour de Beta (et donc phi)
+ #M step: update for Beta ( and then phi)
for(r in 1:k)
{
- Z_indice = seq_len(n)[Z==r] #indices où Z == r
+ Z_indice = seq_len(n)[Z==r] #indices where Z == r
if (length(Z_indice) == 0)
next
#U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
}
- #Etape E et calcul de LLF
+ #Step E and computation of the loglikelihood
sumLogLLF2 = 0
for(i in seq_len(n))
{
#'
#' Construct a collection of models with the Lasso-MLE procedure.
#'
+#' @param phiInit an initialization for phi, get by initSmallEM.R
+#' @param rhoInit an initialization for rho, get by initSmallEM.R
+#' @param piInit an initialization for pi, get by initSmallEM.R
+#' @param gamInit an initialization for gam, get by initSmallEM.R
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param S output of selectVariables.R
+#' @param ncores Number of cores, by default = 3
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#'
+#' @return a list with several models, defined by phi, rho, pi, llh
#'
-#' @param ...
-#'
-#' @return ...
-#'
-#' export
-constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini, maxi,
- gamma, X, Y, thresh, tau, S, ncores=3, fast=TRUE, verbose=FALSE)
+#' @export
+constructionModelesLassoMLE = function( phiInit, rhoInit, piInit, gamInit, mini, maxi,gamma, X, Y,
+ eps, S, ncores=3, fast=TRUE, verbose=FALSE)
{
if (ncores > 1)
{
cl = parallel::makeCluster(ncores, outfile='')
parallel::clusterExport( cl, envir=environment(),
- varlist=c("phiInit","rhoInit","gamInit","mini","maxi","gamma","X","Y","thresh",
- "tau","S","ncores","verbose") )
+ varlist=c("phiInit","rhoInit","gamInit","mini","maxi","gamma","X","Y","eps",
+ "S","ncores","fast","verbose") )
}
# Individual model computation
# lambda == 0 because we compute the EMV: no penalization here
res = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0,
- X[,col.sel], Y, tau, fast)
+ X[,col.sel], Y, eps, fast)
# Eval dimension from the result + selected
phiLambda2 = res$phi
#' constructionModelesLassoRank
#'
-#' TODO: description
+#' Construct a collection of models with the Lasso-Rank procedure.
+#'
+#' @param S output of selectVariables.R
+#' @param k number of components
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
+#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
+#' @param ncores Number of cores, by default = 3
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#'
+#' @return a list with several models, defined by phi, rho, pi, llh
#'
-#' @param ...
-#'
-#' @return ...
-#'
-#' export
-constructionModelesLassoRank = function(pi, rho, mini, maxi, X, Y, tau, A1, rangmin,
- rangmax, ncores, fast=TRUE, verbose=FALSE)
+#' @export
+constructionModelesLassoRank = function(S, k, mini, maxi, X, Y, eps, rank.min,
+ rank.max, ncores, fast=TRUE, verbose=FALSE)
{
n = dim(X)[1]
p = dim(X)[2]
- m = dim(rho)[2]
- k = dim(rho)[3]
- L = dim(A1)[2]
-
- # On cherche les rangs possiblement intéressants
- deltaRank = rangmax - rangmin + 1
+ m = dim(Y)[2]
+ L = length(S)
+
+ # Possible interesting ranks
+ deltaRank = rank.max - rank.min + 1
Size = deltaRank^k
- Rank = matrix(0, nrow=Size, ncol=k)
+ RankLambda = matrix(0, nrow=Size*L, ncol=k+1)
for (r in 1:k)
- {
- # On veut le tableau de toutes les combinaisons de rangs possibles
- # Dans la première colonne : on répète (rangmax-rangmin)^(k-1) chaque chiffre :
- # ça remplit la colonne
- # Dans la deuxieme : on répète (rangmax-rangmin)^(k-2) chaque chiffre,
- # et on fait ça (rangmax-rangmin)^2 fois
- # ...
- # Dans la dernière, on répète chaque chiffre une fois,
- # et on fait ça (rangmin-rangmax)^(k-1) fois.
- Rank[,r] = rangmin + rep(0:(deltaRank-1), deltaRank^(r-1), each=deltaRank^(k-r))
+ {
+ # On veut le tableau de toutes les combinaisons de rangs possibles, et des lambdas
+ # Dans la première colonne : on répète (rank.max-rank.min)^(k-1) chaque chiffre :
+ # ça remplit la colonne
+ # Dans la deuxieme : on répète (rank.max-rank.min)^(k-2) chaque chiffre,
+ # et on fait ça (rank.max-rank.min)^2 fois
+ # ...
+ # Dans la dernière, on répète chaque chiffre une fois,
+ # et on fait ça (rank.min-rank.max)^(k-1) fois.
+ RankLambda[,r] = rep(rank.min + rep(0:(deltaRank-1), deltaRank^(r-1), each=deltaRank^(k-r)), each = L)
}
-
+ RankLambda[,k+1] = rep(1:L, times = Size)
+
if (ncores > 1)
- {
+ {
cl = parallel::makeCluster(ncores, outfile='')
parallel::clusterExport( cl, envir=environment(),
- varlist=c("A1","Size","Pi","Rho","mini","maxi","X","Y","tau",
- "Rank","m","phi","ncores","verbose") )
- }
-
- computeAtLambda <- function(lambdaIndex)
- {
- if (ncores > 1)
- require("valse") #workers start with an empty environment
-
- # on ne garde que les colonnes actives
- # 'active' sera l'ensemble des variables informatives
- active = A1[,lambdaIndex]
- active = active[-(active==0)]
- phi = array(0, dim=c(p,m,k,Size))
- llh = matrix(0, Size, 2) #log-likelihood
- if (length(active) > 0)
- {
- for (j in 1:Size)
- {
- res = EMGrank(Pi[,lambdaIndex], Rho[,,,lambdaIndex], mini, maxi,
- X[,active], Y, tau, Rank[j,], fast)
- llh = rbind(llh,
- c( res$LLF, sum(Rank[j,] * (length(active)- Rank[j,] + m)) ) )
- phi[active,,,] = rbind(phi[active,,,], res$phi)
+ varlist=c("A1","Size","Pi","Rho","mini","maxi","X","Y","eps",
+ "Rank","m","phi","ncores","verbose") )
+ }
+
+ computeAtLambda <- function(index)
+ {
+ lambdaIndex = RankLambda[index,k+1]
+ rankIndex = RankLambda[index,1:k]
+ if (ncores > 1)
+ require("valse") #workers start with an empty environment
+
+ # 'relevant' will be the set of relevant columns
+ selected = S[[lambdaIndex]]$selected
+ relevant = c()
+ for (j in 1:p){
+ if (length(selected[[j]])>0){
+ relevant = c(relevant,j)
}
}
- list("llh"=llh, "phi"=phi)
- }
-
- #Pour chaque lambda de la grille, on calcule les coefficients
+ if (max(rankIndex)<length(relevant)){
+ phi = array(0, dim=c(p,m,k))
+ if (length(relevant) > 0)
+ {
+ res = EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi,
+ X[,relevant], Y, eps, rankIndex, fast)
+ llh = c( res$LLF, sum(rankIndex * (length(relevant)- rankIndex + m)) )
+ phi[relevant,,] = res$phi
+ }
+ list("llh"=llh, "phi"=phi, "pi" = S[[lambdaIndex]]$Pi, "rho" = S[[lambdaIndex]]$Rho)
+
+ }
+ }
+
+ #For each lambda in the grid we compute the estimators
out =
- if (ncores > 1)
- parLapply(cl, seq_along(glambda), computeAtLambda)
- else
- lapply(seq_along(glambda), computeAtLambda)
-
- if (ncores > 1)
+ if (ncores > 1)
+ parLapply(cl, seq_len(length(S)*Size), computeAtLambda)
+ else
+ lapply(seq_len(length(S)*Size), computeAtLambda)
+
+ if (ncores > 1)
parallel::stopCluster(cl)
-
- # TODO: this is a bit ugly. Better use bigmemory and fill llh/phi in-place
- # (but this also adds a dependency...)
- llh <- do.call( rbind, lapply(out, function(model) model$llh) )
- phi <- do.call( rbind, lapply(out, function(model) model$phi) )
- list("llh"=llh, "phi"=phi)
+
+ out
}
maxiInit = 11
new_EMG = EMGLLF(phiInit1[,,,repet], rhoInit1[,,,repet], piInit1[repet,],
- gamInit1[,,repet], miniInit, maxiInit, gamma=1, lambda=0, X, Y, tau=1e-4, fast)
+ gamInit1[,,repet], miniInit, maxiInit, gamma=1, lambda=0, X, Y, eps=1e-4, fast)
LLFEessai = new_EMG$LLF
LLFinit1[repet] = LLFEessai[length(LLFEessai)]
}
#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
#' @param kmin integer, minimum number of clusters, by default = 2
#' @param kmax integer, maximum number of clusters, by default = 10
-#' @param rang.min integer, minimum rank in the low rank procedure, by default = 1
-#' @param rang.max integer, maximum rank in the
+#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
+#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
#' @param ncores_outer Number of cores for the outer loop on k
#' @param ncores_inner Number of cores for the inner loop on lambda
+#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
#' @param size_coll_mod (Maximum) size of a collection of models
#' @param fast TRUE to use compiled C code, FALSE for R code only
#' @param verbose TRUE to show some execution traces
#' #TODO: a few examples
#' @export
valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10, maxi=50,
- eps=1e-4, kmin=2, kmax=2, rank.min=1, rank.max=10, ncores_outer=1, ncores_inner=1,
+ eps=1e-4, kmin=2, kmax=3, rank.min=1, rank.max=5, ncores_outer=1, ncores_inner=1,
+ thresh=1e-8,
size_coll_mod=10, fast=TRUE, verbose=FALSE, plot = TRUE)
{
p = dim(X)[2]
{
cl = parallel::makeCluster(ncores_outer, outfile='')
parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure",
- "selecMod","gamma","mini","maxi","eps","kmin","kmax","rang.min","rang.max",
- "ncores_outer","ncores_inner","verbose","p","m") )
+ "selecMod","gamma","mini","maxi","eps","kmin","kmax","rank.min","rank.max",
+ "ncores_outer","ncores_inner","thresh","size_coll_mod","verbose","p","m") )
}
# Compute models with k components
#select variables according to each regularization parameter
#from the grid: S$selected corresponding to selected variables
S = selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma,
- grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps?
+ grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
if (procedure == 'LassoMLE')
{
print('run the procedure Lasso-MLE')
#compute parameter estimations, with the Maximum Likelihood
#Estimator, restricted on selected variables.
- models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
- mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, fast, verbose)
+ models <- constructionModelesLassoMLE( P$phiInit, P$rhoInit, P$piInit, P$gamInit,
+ mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
+
}
else
{
print('run the procedure Lasso-Rank')
#compute parameter estimations, with the Low Rank
#Estimator, restricted on selected variables.
- models <- constructionModelesLassoRank(S$Pi, S$Rho, mini, maxi, X, Y, eps, S,
+ models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps,
rank.min, rank.max, ncores_inner, fast, verbose)
}
#warning! Some models are NULL after running selectVariables
#' @param glambda grid of regularization parameters
#' @param X matrix of regressors
#' @param Y matrix of responses
-#' @param thres threshold to consider a coefficient to be equal to 0
-#' @param tau threshold to say that EM algorithm has converged
+#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
+#' @param eps threshold to say that EM algorithm has converged
#' @param ncores Number or cores for parallel execution (1 to disable)
#'
#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
#' @export
#'
selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
- X,Y,thresh,tau, ncores=3, fast=TRUE)
+ X,Y,thresh=1e-8,eps, ncores=3, fast=TRUE)
{
if (ncores > 1)
{
cl = parallel::makeCluster(ncores, outfile='')
parallel::clusterExport(cl=cl,
- varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
+ varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","eps"),
envir=environment())
}
# Computation for a fixed lambda
computeCoefs <- function(lambda)
{
- params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
+ params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,fast)
p = dim(phiInit)[1]
m = dim(phiInit)[2]