#'
#' @export
EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
- mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE)
+ mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE)
{
- if (!fast)
- {
- # Function in R
- return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau))
- }
-
- # Function in C
- n = nrow(X) #nombre d'echantillons
- p = ncol(X) #nombre de covariables
- m = ncol(Y) #taille de Y (multivarié)
- k = length(piInit) #nombre de composantes dans le mélange
- .Call("EMGLLF",
- phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, tau,
- phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi),
- S=double(p*m*k), affec=integer(n),
- n, p, m, k,
- PACKAGE="valse")
+ if (!fast)
+ {
+ # Function in R
+ return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau))
+ }
+
+ # Function in C
+ n = nrow(X) #nombre d'echantillons
+ p = ncol(X) #nombre de covariables
+ m = ncol(Y) #taille de Y (multivarié)
+ k = length(piInit) #nombre de composantes dans le mélange
+ .Call("EMGLLF",
+ phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, tau,
+ phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi),
+ S=double(p*m*k), affec=integer(n),
+ n, p, m, k,
+ PACKAGE="valse")
}
# R version - slow but easy to read
-.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
+.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X2,Y,tau)
{
- # Matrix dimensions
- n = dim(X)[1]
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
-
- # Outputs
- phi = phiInit
- rho = rhoInit
- pi = piInit
- llh = -Inf
- S = array(0, dim=c(p,m,k))
-
- # Algorithm variables
- gam = gamInit
- Gram2 = array(0, dim=c(p,p,k))
- ps2 = array(0, dim=c(p,m,k))
- X2 = array(0, dim=c(n,p,k))
- Y2 = array(0, dim=c(n,m,k))
- EPS = 1e-15
-
- for (ite in 1:maxi)
- {
- # Remember last pi,rho,phi values for exit condition in the end of loop
- Phi = phi
- Rho = rho
- Pi = pi
-
- # Calcul associé à Y et X
- for (r in 1:k)
- {
- for (mm in 1:m)
- Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
- for (i in 1:n)
- X2[i,,r] = sqrt(gam[i,r]) * X[i,]
- for (mm in 1:m)
- ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
- for (j in 1:p)
- {
- for (s in 1:p)
- Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
- }
- }
-
- ##########
- #Etape M #
- ##########
-
- # Pour pi
- b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
- gam2 = colSums(gam)
- a = sum(gam %*% log(pi))
-
- # Tant que les props sont negatives
- kk = 0
- pi2AllPositive = FALSE
- while (!pi2AllPositive)
- {
- pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
- pi2AllPositive = all(pi2 >= 0)
- kk = kk+1
- }
-
- # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
- while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
- -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
- {
- pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
- kk = kk + 1
- }
- t = 0.1^kk
- pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
- #Pour phi et rho
- for (r in 1:k)
- {
- for (mm in 1:m)
- {
- ps = 0
- for (i in 1:n)
- ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
- nY2 = sum(Y2[,mm,r]^2)
- rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
- }
- }
-
- for (r in 1:k)
- {
- for (j in 1:p)
- {
- for (mm in 1:m)
- {
- S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
- if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
- phi[j,mm,r]=0
- else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
- phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
- else
- phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
- }
- }
- }
-
- ##########
- #Etape E #
- ##########
-
- # Precompute det(rho[,,r]) for r in 1...k
- detRho = sapply(1:k, function(r) det(rho[,,r]))
-
- sumLogLLH = 0
- for (i in 1:n)
- {
- # Update gam[,]
- sumGamI = 0
- for (r in 1:k)
- {
- gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
- sumGamI = sumGamI + gam[i,r]
- }
- sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2))
- if (sumGamI > EPS) #else: gam[i,] is already ~=0
- gam[i,] = gam[i,] / sumGamI
- }
-
- sumPen = sum(pi^gamma * b)
- last_llh = llh
- llh = -sumLogLLH/n + lambda*sumPen
- dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
- Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
- Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
- Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
- dist2 = max(Dist1,Dist2,Dist3)
-
- if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
- break
- }
-
- affec = apply(gam, 1, which.max)
- list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec )
+ # Matrix dimensions
+ n = dim(Y)[1]
+ if (length(dim(phiInit)) == 2){
+ p = 1
+ m = dim(phiInit)[1]
+ k = dim(phiInit)[2]
+ } else {
+ p = dim(phiInit)[1]
+ m = dim(phiInit)[2]
+ k = dim(phiInit)[3]
+ }
+ X = matrix(nrow = n, ncol = p)
+ X[1:n,1:p] = X2
+ # Outputs
+ phi = array(NA, dim = c(p,m,k))
+ phi[1:p,,] = phiInit
+ rho = rhoInit
+ pi = piInit
+ llh = -Inf
+ S = array(0, dim=c(p,m,k))
+
+ # Algorithm variables
+ gam = gamInit
+ Gram2 = array(0, dim=c(p,p,k))
+ ps2 = array(0, dim=c(p,m,k))
+ X2 = array(0, dim=c(n,p,k))
+ Y2 = array(0, dim=c(n,m,k))
+ EPS = 1e-15
+
+ for (ite in 1:maxi)
+ {
+ # Remember last pi,rho,phi values for exit condition in the end of loop
+ Phi = phi
+ Rho = rho
+ Pi = pi
+
+ # Computations associated to X and Y
+ for (r in 1:k)
+ {
+ for (mm in 1:m)
+ Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
+ for (i in 1:n)
+ X2[i,,r] = sqrt(gam[i,r]) * X[i,]
+ for (mm in 1:m)
+ ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+ for (j in 1:p)
+ {
+ for (s in 1:p)
+ Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+ }
+ }
+
+ #########
+ #M step #
+ #########
+
+ # For pi
+ b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
+ gam2 = colSums(gam)
+ a = sum(gam %*% log(pi))
+
+ # While the proportions are nonpositive
+ kk = 0
+ pi2AllPositive = FALSE
+ while (!pi2AllPositive)
+ {
+ pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
+ pi2AllPositive = all(pi2 >= 0)
+ kk = kk+1
+ }
+
+ # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
+ while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+ -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
+ {
+ pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
+ kk = kk + 1
+ }
+ t = 0.1^kk
+ pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+
+ #For phi and rho
+ for (r in 1:k)
+ {
+ for (mm in 1:m)
+ {
+ ps = 0
+ for (i in 1:n)
+ ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
+ nY2 = sum(Y2[,mm,r]^2)
+ rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
+ }
+ }
+
+ for (r in 1:k)
+ {
+ for (j in 1:p)
+ {
+ for (mm in 1:m)
+ {
+ S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
+ if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
+ phi[j,mm,r]=0
+ else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
+ phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
+ else
+ phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
+ }
+ }
+ }
+
+ ########
+ #E step#
+ ########
+
+ # Precompute det(rho[,,r]) for r in 1...k
+ detRho = sapply(1:k, function(r) det(rho[,,r]))
+ gam1 = matrix(0, nrow = n, ncol = k)
+ for (i in 1:n)
+ {
+ # Update gam[,]
+ for (r in 1:k)
+ {
+ gam1[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
+ }
+ }
+ gam = gam1 / rowSums(gam1)
+ sumLogLLH = sum(log(rowSums(gam)) - log((2*base::pi)^(m/2)))
+ sumPen = sum(pi^gamma * b)
+ last_llh = llh
+ llh = -sumLogLLH/n + lambda*sumPen
+ dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
+ Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
+ Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
+ Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
+ dist2 = max(Dist1,Dist2,Dist3)
+
+ if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
+ break
+ }
+
+ list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S)
}
p = dim(phiInit)[1]
m = dim(phiInit)[2]
k = dim(phiInit)[3]
-
sel.lambda = S[[lambda]]$selected
# col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
col.sel <- which( sapply(sel.lambda,length) > 0 ) #if list of selected vars
-
if (length(col.sel) == 0)
return (NULL)
piLambda = res$pi
phiLambda = array(0, dim = c(p,m,k))
for (j in seq_along(col.sel))
- phiLambda[col.sel[j],,] = phiLambda2[j,,]
+ phiLambda[col.sel[j],sel.lambda[[j]],] = phiLambda2[j,sel.lambda[[j]],]
dimension = length(unlist(sel.lambda))
# Computation of the loglikelihood
densite = vector("double",n)
for (r in 1:k)
{
- delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r]))
+ if (length(col.sel)==1){
+ delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%t(phiLambda[col.sel,,r])))
+ } else delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r]))
densite = densite + piLambda[r] *
det(rhoLambda[,,r])/(sqrt(2*base::pi))^m * exp(-diag(tcrossprod(delta))/2.0)
}
#' #TODO: a few examples
#' @export
valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10, maxi=50,
- eps=1e-4, kmin=2, kmax=4, rank.min=1, rank.max=10, ncores_outer=1, ncores_inner=1,
- size_coll_mod=50, fast=TRUE, verbose=FALSE, plot = TRUE)
+ eps=1e-4, kmin=2, kmax=2, rank.min=1, rank.max=10, ncores_outer=1, ncores_inner=1,
+ size_coll_mod=10, fast=TRUE, verbose=FALSE, plot = TRUE)
{
p = dim(X)[2]
m = dim(Y)[2]
n = dim(X)[1]
-
+
if (verbose)
- print("main loop: over all k and all lambda")
-
- if (ncores_outer > 1)
- {
- cl = parallel::makeCluster(ncores_outer, outfile='')
- parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure",
- "selecMod","gamma","mini","maxi","eps","kmin","kmax","rang.min","rang.max",
- "ncores_outer","ncores_inner","verbose","p","m") )
- }
-
- # Compute models with k components
- computeModels <- function(k)
- {
- if (ncores_outer > 1)
- require("valse") #nodes start with an empty environment
-
- if (verbose)
- print(paste("Parameters initialization for k =",k))
- #smallEM initializes parameters by k-means and regression model in each component,
+ print("main loop: over all k and all lambda")
+
+ if (ncores_outer > 1)
+ {
+ cl = parallel::makeCluster(ncores_outer, outfile='')
+ parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure",
+ "selecMod","gamma","mini","maxi","eps","kmin","kmax","rang.min","rang.max",
+ "ncores_outer","ncores_inner","verbose","p","m") )
+ }
+
+ # Compute models with k components
+ computeModels <- function(k)
+ {
+ if (ncores_outer > 1)
+ require("valse") #nodes start with an empty environment
+
+ if (verbose)
+ print(paste("Parameters initialization for k =",k))
+ #smallEM initializes parameters by k-means and regression model in each component,
#doing this 20 times, and keeping the values maximizing the likelihood after 10
#iterations of the EM algorithm.
P = initSmallEM(k, X, Y)
grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, X, Y,
- gamma, mini, maxi, eps, fast)
+ gamma, mini, maxi, eps, fast)
if (length(grid_lambda)>size_coll_mod)
grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
-
- if (verbose)
- print("Compute relevant parameters")
+
+ if (verbose)
+ print("Compute relevant parameters")
#select variables according to each regularization parameter
#from the grid: S$selected corresponding to selected variables
S = selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma,
- grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps?
+ grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps?
if (procedure == 'LassoMLE')
- {
+ {
if (verbose)
- print('run the procedure Lasso-MLE')
+ print('run the procedure Lasso-MLE')
#compute parameter estimations, with the Maximum Likelihood
#Estimator, restricted on selected variables.
models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
- mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, fast, verbose)
+ mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, fast, verbose)
}
- else
- {
+ else
+ {
if (verbose)
- print('run the procedure Lasso-Rank')
+ print('run the procedure Lasso-Rank')
#compute parameter estimations, with the Low Rank
#Estimator, restricted on selected variables.
models <- constructionModelesLassoRank(S$Pi, S$Rho, mini, maxi, X, Y, eps, S,
- rank.min, rank.max, ncores_inner, fast, verbose)
+ rank.min, rank.max, ncores_inner, fast, verbose)
}
- #attention certains modeles sont NULL après selectVariables
- models = models[sapply(models, function(cell) !is.null(cell))]
+ #warning! Some models are NULL after running selectVariables
+ models = models[sapply(models, function(cell) !is.null(cell))]
models
}
-
- # List (index k) of lists (index lambda) of models
- models_list <-
- if (ncores_outer > 1)
- parLapply(cl, kmin:kmax, computeModels)
- else
- lapply(kmin:kmax, computeModels)
- if (ncores_outer > 1)
- parallel::stopCluster(cl)
-
- if (! requireNamespace("capushe", quietly=TRUE))
- {
- warning("'capushe' not available: returning all models")
- return (models_list)
- }
-
- # Get summary "tableauRecap" from models
- tableauRecap = do.call( rbind, lapply( seq_along(models_list), function(i) {
- models <- models_list[[i]]
- #Pour un groupe de modeles (même k, différents lambda):
- LLH <- sapply( models, function(model) model$llh[1] )
- k = length(models[[1]]$pi)
- sumPen = sapply(models, function(model)
- k*(dim(model$rho)[1]+sum(model$phi[,,1]!=0)+1)-1)
- data.frame(model=paste(i,".",seq_along(models),sep=""),
- pen=sumPen/n, complexity=sumPen, contrast=-LLH)
- } ) )
-print(tableauRecap)
+
+ # List (index k) of lists (index lambda) of models
+ models_list <-
+ if (ncores_outer > 1)
+ parLapply(cl, kmin:kmax, computeModels)
+ else
+ lapply(kmin:kmax, computeModels)
+ if (ncores_outer > 1)
+ parallel::stopCluster(cl)
+
+ if (! requireNamespace("capushe", quietly=TRUE))
+ {
+ warning("'capushe' not available: returning all models")
+ return (models_list)
+ }
+
+ # Get summary "tableauRecap" from models
+ tableauRecap = do.call( rbind, lapply( seq_along(models_list), function(i) {
+ models <- models_list[[i]]
+ #For a collection of models (same k, several lambda):
+ LLH <- sapply( models, function(model) model$llh[1] )
+ k = length(models[[1]]$pi)
+ sumPen = sapply(models, function(model)
+ k*(dim(model$rho)[1]+sum(model$phi[,,1]!=0)+1)-1)
+ data.frame(model=paste(i,".",seq_along(models),sep=""),
+ pen=sumPen/n, complexity=sumPen, contrast=-LLH)
+ } ) )
+
+ print(tableauRecap)
+ tableauRecap = tableauRecap[which(tableauRecap[,4]!= Inf),]
modSel = capushe::capushe(tableauRecap, n)
indModSel <-
- if (selecMod == 'DDSE')
- as.numeric(modSel@DDSE@model)
- else if (selecMod == 'Djump')
- as.numeric(modSel@Djump@model)
- else if (selecMod == 'BIC')
- modSel@BIC_capushe$model
- else if (selecMod == 'AIC')
- modSel@AIC_capushe$model
-
+ if (selecMod == 'DDSE')
+ as.numeric(modSel@DDSE@model)
+ else if (selecMod == 'Djump')
+ as.numeric(modSel@Djump@model)
+ else if (selecMod == 'BIC')
+ modSel@BIC_capushe$model
+ else if (selecMod == 'AIC')
+ modSel@AIC_capushe$model
+
mod = as.character(tableauRecap[indModSel,1])
listMod = as.integer(unlist(strsplit(mod, "[.]")))
modelSel = models_list[[listMod[1]]][[listMod[2]]]
Gam = Gam/rowSums(Gam)
modelSel$affec = apply(Gam, 1,which.max)
modelSel$proba = Gam
-
- if (plot){
+
+ if (plot){
print(plot_valse(modelSel,n))
}
#'
#' @export
#'
-plot_valse = function(model,n){
+plot_valse = function(model,n, comp = FALSE, k1 = NA, k2 = NA){
require("gridExtra")
require("ggplot2")
require("reshape2")
print(gReg)
## Differences between two clusters
- k1 = 1
- k2 = 2
- Melt = melt(t(model$phi[,,k1]-model$phi[,,k2]))
- gDiff = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
- scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") +
- ggtitle(paste("Difference between regression matrices in cluster",k1, "and", k2))
- print(gDiff)
+ if (comp){
+ if (is.na(k1) || is.na(k)){print('k1 and k2 must be integers, representing the clusters you want to compare')}
+ Melt = melt(t(model$phi[,,k1]-model$phi[,,k2]))
+ gDiff = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
+ scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") +
+ ggtitle(paste("Difference between regression matrices in cluster",k1, "and", k2))
+ print(gDiff)
+
+ }
### Covariance matrices
matCov = matrix(NA, nrow = dim(model$rho[,,1])[1], ncol = K)
ggtitle("Covariance matrices")
print(gCov )
- ### proportions
+ ### Proportions
gam2 = matrix(NA, ncol = K, nrow = n)
for (i in 1:n){
- gam2[i, ] = c(model$Gam[i, model$affec[i]], model$affec[i])
+ gam2[i, ] = c(model$proba[i, model$affec[i]], model$affec[i])
}
bp <- ggplot(data.frame(gam2), aes(x=X2, y=X1, color=X2, group = X2)) +
geom_boxplot() + theme(legend.position = "none")+ background_grid(major = "xy", minor = "none")
- print(bp )
+ print(bp)
### Mean in each cluster
XY = cbind(X,Y)
XY_class= list()
meanPerClass= matrix(0, ncol = K, nrow = dim(XY)[2])
for (r in 1:K){
- XY_class[[r]] = XY[affec == r, ]
- meanPerClass[,r] = apply(XY_class[[r]], 2, mean)
+ XY_class[[r]] = XY[model$affec == r, ]
+ if (sum(model$affec==r) == 1){
+ meanPerClass[,r] = XY_class[[r]]
+ } else {
+ meanPerClass[,r] = apply(XY_class[[r]], 2, mean)
+ }
}
data = data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2],K))
g = ggplot(data, aes(x=time, y = mean, group = cluster, color = cluster))
#' @export
#'
selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
- X,Y,thresh,tau, ncores=3, fast=TRUE)
+ X,Y,thresh,tau, ncores=3, fast=TRUE)
{
- if (ncores > 1)
- {
- cl = parallel::makeCluster(ncores, outfile='')
- parallel::clusterExport(cl=cl,
- varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
- envir=environment())
- }
-
- # Calcul pour un lambda
- computeCoefs <- function(lambda)
- {
- params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
-
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
-
- #selectedVariables: list where element j contains vector of selected variables in [1,m]
- selectedVariables = lapply(1:p, function(j) {
- #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
- #and finally return the corresponding indices
- seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
- })
-
- list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
- }
-
- # Pour chaque lambda de la grille, on calcule les coefficients
- out <-
- if (ncores > 1)
- parLapply(cl, glambda, computeCoefs)
- else
- lapply(glambda, computeCoefs)
- if (ncores > 1)
- parallel::stopCluster(cl)
-
- # Suppression doublons
- sha1_array <- lapply(out, digest::sha1)
- out[ !duplicated(sha1_array) ]
-
- out
+ if (ncores > 1)
+ {
+ cl = parallel::makeCluster(ncores, outfile='')
+ parallel::clusterExport(cl=cl,
+ varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
+ envir=environment())
+ }
+
+ # Computation for a fixed lambda
+ computeCoefs <- function(lambda)
+ {
+ params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast)
+
+ p = dim(phiInit)[1]
+ m = dim(phiInit)[2]
+
+ #selectedVariables: list where element j contains vector of selected variables in [1,m]
+ selectedVariables = lapply(1:p, function(j) {
+ #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
+ #and finally return the corresponding indices
+ seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
+ })
+
+ list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
+ }
+
+ # For each lambda in the grid, we compute the coefficients
+ out <-
+ if (ncores > 1)
+ parLapply(cl, glambda, computeCoefs)
+ else
+ lapply(glambda, computeCoefs)
+ if (ncores > 1)
+ parallel::stopCluster(cl)
+ # Suppress models which are computed twice
+ #En fait, ca ca fait la comparaison de tous les parametres
+ #On veut juste supprimer ceux qui ont les memes variables sélectionnées
+ #sha1_array <- lapply(out, digest::sha1)
+ #out[ duplicated(sha1_array) ]
+ selec = lapply(out, function(model) model$selected)
+ ind_dup = duplicated(selec)
+ ind_uniq = which(!ind_dup)
+ out2 = list()
+ for (l in 1:length(ind_uniq)){
+ out2[[l]] = out[[ind_uniq[l]]]
+ }
+ out2
}
## Modele
###########
K = 2
- p = 20
+ p = 48
T = seq(0,1.5,length.out = p)
T2 = seq(0,3, length.out = 2*p)
- n = 30
+ n = 100
x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1
sigmaX = 0.12
sigmaY = 0.12
###########
require(wavelets)
XY = array(0, dim = c(2*p,n))
- Xproj = array(0, dim=c(48,n))
- Yproj = array(0, dim=c(48,n))
XYproj = array(0, dim=c(96,n))
x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n)
affec = sample(c(1,2), n, replace = TRUE)
Ay = dwt(y[,i], filter='haar')@V
Ay = rev(unlist(Ay))
Ay = Ay[2:(1+3)]
- Xproj[,i] = c(Ax,Dx)
- Yproj[,i] = c(Ay,Dy)
XYproj[,i] = c(Ax,Dx,Ay,Dy)
}
- res_valse = valse(x,y)
- res_valse_proj = valse(Xproj, Yproj)
+ res_valse = valse(x,y, kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 200)
+ res_valse_proj = valse(XYproj[1:p,],XYproj[(p+1):(2*p),], kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 200)
- save(res_valse,file=paste("./Out/Res_",ite, ".RData",sep=""))
- save(res_valse_proj,file=paste("./Out/ResProj_",ite, ".RData",sep=""))
+ save(res_valse,file=paste("Res_",ite, ".RData",sep=""))
+ save(res_valse_proj,file=paste("ResProj_",ite, ".RData",sep=""))
}