From fb6e49cb85308c3f99cc98fe955aa7c36839c819 Mon Sep 17 00:00:00 2001 From: emilie Date: Wed, 12 Apr 2017 17:57:28 +0200 Subject: [PATCH] fix many problems (models appearing twice, irrelevant coefficients in a relevant column among others) --- pkg/R/EMGLLF.R | 320 ++++++++++++++-------------- pkg/R/constructionModelesLassoMLE.R | 8 +- pkg/R/main.R | 152 ++++++------- pkg/R/plot_valse.R | 32 +-- pkg/R/selectVariables.R | 89 ++++---- reports/simulData_17mars.R | 16 +- 6 files changed, 315 insertions(+), 302 deletions(-) diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R index 1739204..5a69a52 100644 --- a/pkg/R/EMGLLF.R +++ b/pkg/R/EMGLLF.R @@ -23,168 +23,170 @@ #' #' @export EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, - mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE) + mini, maxi, gamma, lambda, X, Y, tau, fast=TRUE) { - if (!fast) - { - # Function in R - return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)) - } - - # Function in C - n = nrow(X) #nombre d'echantillons - p = ncol(X) #nombre de covariables - m = ncol(Y) #taille de Y (multivarié) - k = length(piInit) #nombre de composantes dans le mélange - .Call("EMGLLF", - phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, tau, - phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi), - S=double(p*m*k), affec=integer(n), - n, p, m, k, - PACKAGE="valse") + if (!fast) + { + # Function in R + return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)) + } + + # Function in C + n = nrow(X) #nombre d'echantillons + p = ncol(X) #nombre de covariables + m = ncol(Y) #taille de Y (multivarié) + k = length(piInit) #nombre de composantes dans le mélange + .Call("EMGLLF", + phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, tau, + phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi), + S=double(p*m*k), affec=integer(n), + n, p, m, k, + PACKAGE="valse") } # R version - slow but easy to read -.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau) +.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X2,Y,tau) { - # Matrix dimensions - n = dim(X)[1] - p = dim(phiInit)[1] - m = dim(phiInit)[2] - k = dim(phiInit)[3] - - # Outputs - phi = phiInit - rho = rhoInit - pi = piInit - llh = -Inf - S = array(0, dim=c(p,m,k)) - - # Algorithm variables - gam = gamInit - Gram2 = array(0, dim=c(p,p,k)) - ps2 = array(0, dim=c(p,m,k)) - X2 = array(0, dim=c(n,p,k)) - Y2 = array(0, dim=c(n,m,k)) - EPS = 1e-15 - - for (ite in 1:maxi) - { - # Remember last pi,rho,phi values for exit condition in the end of loop - Phi = phi - Rho = rho - Pi = pi - - # Calcul associé à Y et X - for (r in 1:k) - { - for (mm in 1:m) - Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm] - for (i in 1:n) - X2[i,,r] = sqrt(gam[i,r]) * X[i,] - for (mm in 1:m) - ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r]) - for (j in 1:p) - { - for (s in 1:p) - Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r]) - } - } - - ########## - #Etape M # - ########## - - # Pour pi - b = sapply( 1:k, function(r) sum(abs(phi[,,r])) ) - gam2 = colSums(gam) - a = sum(gam %*% log(pi)) - - # Tant que les props sont negatives - kk = 0 - pi2AllPositive = FALSE - while (!pi2AllPositive) - { - pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi) - pi2AllPositive = all(pi2 >= 0) - kk = kk+1 - } - - # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante - while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < - -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) ) - { - pi2 = pi + 0.1^kk * (1/n*gam2 - pi) - kk = kk + 1 - } - t = 0.1^kk - pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi)) - - #Pour phi et rho - for (r in 1:k) - { - for (mm in 1:m) - { - ps = 0 - for (i in 1:n) - ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r]) - nY2 = sum(Y2[,mm,r]^2) - rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2) - } - } - - for (r in 1:k) - { - for (j in 1:p) - { - for (mm in 1:m) - { - S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r]) - if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma)) - phi[j,mm,r]=0 - else if(S[j,mm,r] > n*lambda*(pi[r]^gamma)) - phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r] - else - phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r] - } - } - } - - ########## - #Etape E # - ########## - - # Precompute det(rho[,,r]) for r in 1...k - detRho = sapply(1:k, function(r) det(rho[,,r])) - - sumLogLLH = 0 - for (i in 1:n) - { - # Update gam[,] - sumGamI = 0 - for (r in 1:k) - { - gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r] - sumGamI = sumGamI + gam[i,r] - } - sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2)) - if (sumGamI > EPS) #else: gam[i,] is already ~=0 - gam[i,] = gam[i,] / sumGamI - } - - sumPen = sum(pi^gamma * b) - last_llh = llh - llh = -sumLogLLH/n + lambda*sumPen - dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) ) - Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) ) - Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) - Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) ) - dist2 = max(Dist1,Dist2,Dist3) - - if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau))) - break - } - - affec = apply(gam, 1, which.max) - list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec ) + # Matrix dimensions + n = dim(Y)[1] + if (length(dim(phiInit)) == 2){ + p = 1 + m = dim(phiInit)[1] + k = dim(phiInit)[2] + } else { + p = dim(phiInit)[1] + m = dim(phiInit)[2] + k = dim(phiInit)[3] + } + X = matrix(nrow = n, ncol = p) + X[1:n,1:p] = X2 + # Outputs + phi = array(NA, dim = c(p,m,k)) + phi[1:p,,] = phiInit + rho = rhoInit + pi = piInit + llh = -Inf + S = array(0, dim=c(p,m,k)) + + # Algorithm variables + gam = gamInit + Gram2 = array(0, dim=c(p,p,k)) + ps2 = array(0, dim=c(p,m,k)) + X2 = array(0, dim=c(n,p,k)) + Y2 = array(0, dim=c(n,m,k)) + EPS = 1e-15 + + for (ite in 1:maxi) + { + # Remember last pi,rho,phi values for exit condition in the end of loop + Phi = phi + Rho = rho + Pi = pi + + # Computations associated to X and Y + for (r in 1:k) + { + for (mm in 1:m) + Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm] + for (i in 1:n) + X2[i,,r] = sqrt(gam[i,r]) * X[i,] + for (mm in 1:m) + ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r]) + for (j in 1:p) + { + for (s in 1:p) + Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r]) + } + } + + ######### + #M step # + ######### + + # For pi + b = sapply( 1:k, function(r) sum(abs(phi[,,r])) ) + gam2 = colSums(gam) + a = sum(gam %*% log(pi)) + + # While the proportions are nonpositive + kk = 0 + pi2AllPositive = FALSE + while (!pi2AllPositive) + { + pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi) + pi2AllPositive = all(pi2 >= 0) + kk = kk+1 + } + + # t(m) is the largest value in the grid O.1^k such that it is nonincreasing + while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < + -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) ) + { + pi2 = pi + 0.1^kk * (1/n*gam2 - pi) + kk = kk + 1 + } + t = 0.1^kk + pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi)) + + #For phi and rho + for (r in 1:k) + { + for (mm in 1:m) + { + ps = 0 + for (i in 1:n) + ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r]) + nY2 = sum(Y2[,mm,r]^2) + rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2) + } + } + + for (r in 1:k) + { + for (j in 1:p) + { + for (mm in 1:m) + { + S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r]) + if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma)) + phi[j,mm,r]=0 + else if(S[j,mm,r] > n*lambda*(pi[r]^gamma)) + phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r] + else + phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r] + } + } + } + + ######## + #E step# + ######## + + # Precompute det(rho[,,r]) for r in 1...k + detRho = sapply(1:k, function(r) det(rho[,,r])) + gam1 = matrix(0, nrow = n, ncol = k) + for (i in 1:n) + { + # Update gam[,] + for (r in 1:k) + { + gam1[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r] + } + } + gam = gam1 / rowSums(gam1) + sumLogLLH = sum(log(rowSums(gam)) - log((2*base::pi)^(m/2))) + sumPen = sum(pi^gamma * b) + last_llh = llh + llh = -sumLogLLH/n + lambda*sumPen + dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) ) + Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) ) + Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) + Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) ) + dist2 = max(Dist1,Dist2,Dist3) + + if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau))) + break + } + + list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S) } diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R index 227dfdc..b864985 100644 --- a/pkg/R/constructionModelesLassoMLE.R +++ b/pkg/R/constructionModelesLassoMLE.R @@ -31,11 +31,9 @@ constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini, p = dim(phiInit)[1] m = dim(phiInit)[2] k = dim(phiInit)[3] - sel.lambda = S[[lambda]]$selected # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix col.sel <- which( sapply(sel.lambda,length) > 0 ) #if list of selected vars - if (length(col.sel) == 0) return (NULL) @@ -49,14 +47,16 @@ constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini, piLambda = res$pi phiLambda = array(0, dim = c(p,m,k)) for (j in seq_along(col.sel)) - phiLambda[col.sel[j],,] = phiLambda2[j,,] + phiLambda[col.sel[j],sel.lambda[[j]],] = phiLambda2[j,sel.lambda[[j]],] dimension = length(unlist(sel.lambda)) # Computation of the loglikelihood densite = vector("double",n) for (r in 1:k) { - delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r])) + if (length(col.sel)==1){ + delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%t(phiLambda[col.sel,,r]))) + } else delta = (Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r])) densite = densite + piLambda[r] * det(rhoLambda[,,r])/(sqrt(2*base::pi))^m * exp(-diag(tcrossprod(delta))/2.0) } diff --git a/pkg/R/main.R b/pkg/R/main.R index 89c4bcd..72ee724 100644 --- a/pkg/R/main.R +++ b/pkg/R/main.R @@ -26,109 +26,111 @@ #' #TODO: a few examples #' @export valse = function(X, Y, procedure='LassoMLE', selecMod='DDSE', gamma=1, mini=10, maxi=50, - eps=1e-4, kmin=2, kmax=4, rank.min=1, rank.max=10, ncores_outer=1, ncores_inner=1, - size_coll_mod=50, fast=TRUE, verbose=FALSE, plot = TRUE) + eps=1e-4, kmin=2, kmax=2, rank.min=1, rank.max=10, ncores_outer=1, ncores_inner=1, + size_coll_mod=10, fast=TRUE, verbose=FALSE, plot = TRUE) { p = dim(X)[2] m = dim(Y)[2] n = dim(X)[1] - + if (verbose) - print("main loop: over all k and all lambda") - - if (ncores_outer > 1) - { - cl = parallel::makeCluster(ncores_outer, outfile='') - parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure", - "selecMod","gamma","mini","maxi","eps","kmin","kmax","rang.min","rang.max", - "ncores_outer","ncores_inner","verbose","p","m") ) - } - - # Compute models with k components - computeModels <- function(k) - { - if (ncores_outer > 1) - require("valse") #nodes start with an empty environment - - if (verbose) - print(paste("Parameters initialization for k =",k)) - #smallEM initializes parameters by k-means and regression model in each component, + print("main loop: over all k and all lambda") + + if (ncores_outer > 1) + { + cl = parallel::makeCluster(ncores_outer, outfile='') + parallel::clusterExport( cl=cl, envir=environment(), varlist=c("X","Y","procedure", + "selecMod","gamma","mini","maxi","eps","kmin","kmax","rang.min","rang.max", + "ncores_outer","ncores_inner","verbose","p","m") ) + } + + # Compute models with k components + computeModels <- function(k) + { + if (ncores_outer > 1) + require("valse") #nodes start with an empty environment + + if (verbose) + print(paste("Parameters initialization for k =",k)) + #smallEM initializes parameters by k-means and regression model in each component, #doing this 20 times, and keeping the values maximizing the likelihood after 10 #iterations of the EM algorithm. P = initSmallEM(k, X, Y) grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, X, Y, - gamma, mini, maxi, eps, fast) + gamma, mini, maxi, eps, fast) if (length(grid_lambda)>size_coll_mod) grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)] - - if (verbose) - print("Compute relevant parameters") + + if (verbose) + print("Compute relevant parameters") #select variables according to each regularization parameter #from the grid: S$selected corresponding to selected variables S = selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma, - grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps? + grid_lambda, X, Y, 1e-8, eps, ncores_inner, fast) #TODO: 1e-8 as arg?! eps? if (procedure == 'LassoMLE') - { + { if (verbose) - print('run the procedure Lasso-MLE') + print('run the procedure Lasso-MLE') #compute parameter estimations, with the Maximum Likelihood #Estimator, restricted on selected variables. models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, P$gamInit, - mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, fast, verbose) + mini, maxi, gamma, X, Y, thresh, eps, S, ncores_inner, fast, verbose) } - else - { + else + { if (verbose) - print('run the procedure Lasso-Rank') + print('run the procedure Lasso-Rank') #compute parameter estimations, with the Low Rank #Estimator, restricted on selected variables. models <- constructionModelesLassoRank(S$Pi, S$Rho, mini, maxi, X, Y, eps, S, - rank.min, rank.max, ncores_inner, fast, verbose) + rank.min, rank.max, ncores_inner, fast, verbose) } - #attention certains modeles sont NULL après selectVariables - models = models[sapply(models, function(cell) !is.null(cell))] + #warning! Some models are NULL after running selectVariables + models = models[sapply(models, function(cell) !is.null(cell))] models } - - # List (index k) of lists (index lambda) of models - models_list <- - if (ncores_outer > 1) - parLapply(cl, kmin:kmax, computeModels) - else - lapply(kmin:kmax, computeModels) - if (ncores_outer > 1) - parallel::stopCluster(cl) - - if (! requireNamespace("capushe", quietly=TRUE)) - { - warning("'capushe' not available: returning all models") - return (models_list) - } - - # Get summary "tableauRecap" from models - tableauRecap = do.call( rbind, lapply( seq_along(models_list), function(i) { - models <- models_list[[i]] - #Pour un groupe de modeles (même k, différents lambda): - LLH <- sapply( models, function(model) model$llh[1] ) - k = length(models[[1]]$pi) - sumPen = sapply(models, function(model) - k*(dim(model$rho)[1]+sum(model$phi[,,1]!=0)+1)-1) - data.frame(model=paste(i,".",seq_along(models),sep=""), - pen=sumPen/n, complexity=sumPen, contrast=-LLH) - } ) ) -print(tableauRecap) + + # List (index k) of lists (index lambda) of models + models_list <- + if (ncores_outer > 1) + parLapply(cl, kmin:kmax, computeModels) + else + lapply(kmin:kmax, computeModels) + if (ncores_outer > 1) + parallel::stopCluster(cl) + + if (! requireNamespace("capushe", quietly=TRUE)) + { + warning("'capushe' not available: returning all models") + return (models_list) + } + + # Get summary "tableauRecap" from models + tableauRecap = do.call( rbind, lapply( seq_along(models_list), function(i) { + models <- models_list[[i]] + #For a collection of models (same k, several lambda): + LLH <- sapply( models, function(model) model$llh[1] ) + k = length(models[[1]]$pi) + sumPen = sapply(models, function(model) + k*(dim(model$rho)[1]+sum(model$phi[,,1]!=0)+1)-1) + data.frame(model=paste(i,".",seq_along(models),sep=""), + pen=sumPen/n, complexity=sumPen, contrast=-LLH) + } ) ) + + print(tableauRecap) + tableauRecap = tableauRecap[which(tableauRecap[,4]!= Inf),] modSel = capushe::capushe(tableauRecap, n) indModSel <- - if (selecMod == 'DDSE') - as.numeric(modSel@DDSE@model) - else if (selecMod == 'Djump') - as.numeric(modSel@Djump@model) - else if (selecMod == 'BIC') - modSel@BIC_capushe$model - else if (selecMod == 'AIC') - modSel@AIC_capushe$model - + if (selecMod == 'DDSE') + as.numeric(modSel@DDSE@model) + else if (selecMod == 'Djump') + as.numeric(modSel@Djump@model) + else if (selecMod == 'BIC') + modSel@BIC_capushe$model + else if (selecMod == 'AIC') + modSel@AIC_capushe$model + mod = as.character(tableauRecap[indModSel,1]) listMod = as.integer(unlist(strsplit(mod, "[.]"))) modelSel = models_list[[listMod[1]]][[listMod[2]]] @@ -144,8 +146,8 @@ print(tableauRecap) Gam = Gam/rowSums(Gam) modelSel$affec = apply(Gam, 1,which.max) modelSel$proba = Gam - - if (plot){ + + if (plot){ print(plot_valse(modelSel,n)) } diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R index 120196d..0963946 100644 --- a/pkg/R/plot_valse.R +++ b/pkg/R/plot_valse.R @@ -10,7 +10,7 @@ #' #' @export #' -plot_valse = function(model,n){ +plot_valse = function(model,n, comp = FALSE, k1 = NA, k2 = NA){ require("gridExtra") require("ggplot2") require("reshape2") @@ -28,13 +28,15 @@ plot_valse = function(model,n){ print(gReg) ## Differences between two clusters - k1 = 1 - k2 = 2 - Melt = melt(t(model$phi[,,k1]-model$phi[,,k2])) - gDiff = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) + geom_tile() + - scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") + - ggtitle(paste("Difference between regression matrices in cluster",k1, "and", k2)) - print(gDiff) + if (comp){ + if (is.na(k1) || is.na(k)){print('k1 and k2 must be integers, representing the clusters you want to compare')} + Melt = melt(t(model$phi[,,k1]-model$phi[,,k2])) + gDiff = ggplot(data = Melt, aes(x=Var1, y=Var2, fill=value)) + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") + + ggtitle(paste("Difference between regression matrices in cluster",k1, "and", k2)) + print(gDiff) + + } ### Covariance matrices matCov = matrix(NA, nrow = dim(model$rho[,,1])[1], ncol = K) @@ -47,23 +49,27 @@ plot_valse = function(model,n){ ggtitle("Covariance matrices") print(gCov ) - ### proportions + ### Proportions gam2 = matrix(NA, ncol = K, nrow = n) for (i in 1:n){ - gam2[i, ] = c(model$Gam[i, model$affec[i]], model$affec[i]) + gam2[i, ] = c(model$proba[i, model$affec[i]], model$affec[i]) } bp <- ggplot(data.frame(gam2), aes(x=X2, y=X1, color=X2, group = X2)) + geom_boxplot() + theme(legend.position = "none")+ background_grid(major = "xy", minor = "none") - print(bp ) + print(bp) ### Mean in each cluster XY = cbind(X,Y) XY_class= list() meanPerClass= matrix(0, ncol = K, nrow = dim(XY)[2]) for (r in 1:K){ - XY_class[[r]] = XY[affec == r, ] - meanPerClass[,r] = apply(XY_class[[r]], 2, mean) + XY_class[[r]] = XY[model$affec == r, ] + if (sum(model$affec==r) == 1){ + meanPerClass[,r] = XY_class[[r]] + } else { + meanPerClass[,r] = apply(XY_class[[r]], 2, mean) + } } data = data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2],K)) g = ggplot(data, aes(x=time, y = mean, group = cluster, color = cluster)) diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R index b23eac2..65fbde5 100644 --- a/pkg/R/selectVariables.R +++ b/pkg/R/selectVariables.R @@ -23,46 +23,53 @@ #' @export #' selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda, - X,Y,thresh,tau, ncores=3, fast=TRUE) + X,Y,thresh,tau, ncores=3, fast=TRUE) { - if (ncores > 1) - { - cl = parallel::makeCluster(ncores, outfile='') - parallel::clusterExport(cl=cl, - varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"), - envir=environment()) - } - - # Calcul pour un lambda - computeCoefs <- function(lambda) - { - params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast) - - p = dim(phiInit)[1] - m = dim(phiInit)[2] - - #selectedVariables: list where element j contains vector of selected variables in [1,m] - selectedVariables = lapply(1:p, function(j) { - #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector, - #and finally return the corresponding indices - seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ] - }) - - list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi) - } - - # Pour chaque lambda de la grille, on calcule les coefficients - out <- - if (ncores > 1) - parLapply(cl, glambda, computeCoefs) - else - lapply(glambda, computeCoefs) - if (ncores > 1) - parallel::stopCluster(cl) - - # Suppression doublons - sha1_array <- lapply(out, digest::sha1) - out[ !duplicated(sha1_array) ] - - out + if (ncores > 1) + { + cl = parallel::makeCluster(ncores, outfile='') + parallel::clusterExport(cl=cl, + varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"), + envir=environment()) + } + + # Computation for a fixed lambda + computeCoefs <- function(lambda) + { + params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,fast) + + p = dim(phiInit)[1] + m = dim(phiInit)[2] + + #selectedVariables: list where element j contains vector of selected variables in [1,m] + selectedVariables = lapply(1:p, function(j) { + #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector, + #and finally return the corresponding indices + seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ] + }) + + list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi) + } + + # For each lambda in the grid, we compute the coefficients + out <- + if (ncores > 1) + parLapply(cl, glambda, computeCoefs) + else + lapply(glambda, computeCoefs) + if (ncores > 1) + parallel::stopCluster(cl) + # Suppress models which are computed twice + #En fait, ca ca fait la comparaison de tous les parametres + #On veut juste supprimer ceux qui ont les memes variables sélectionnées + #sha1_array <- lapply(out, digest::sha1) + #out[ duplicated(sha1_array) ] + selec = lapply(out, function(model) model$selected) + ind_dup = duplicated(selec) + ind_uniq = which(!ind_dup) + out2 = list() + for (l in 1:length(ind_uniq)){ + out2[[l]] = out[[ind_uniq[l]]] + } + out2 } diff --git a/reports/simulData_17mars.R b/reports/simulData_17mars.R index 252e660..52148fd 100644 --- a/reports/simulData_17mars.R +++ b/reports/simulData_17mars.R @@ -5,10 +5,10 @@ simulData_17mars = function(ite){ ## Modele ########### K = 2 - p = 20 + p = 48 T = seq(0,1.5,length.out = p) T2 = seq(0,3, length.out = 2*p) - n = 30 + n = 100 x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1 sigmaX = 0.12 sigmaY = 0.12 @@ -24,8 +24,6 @@ simulData_17mars = function(ite){ ########### require(wavelets) XY = array(0, dim = c(2*p,n)) - Xproj = array(0, dim=c(48,n)) - Yproj = array(0, dim=c(48,n)) XYproj = array(0, dim=c(96,n)) x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n) affec = sample(c(1,2), n, replace = TRUE) @@ -47,14 +45,12 @@ simulData_17mars = function(ite){ Ay = dwt(y[,i], filter='haar')@V Ay = rev(unlist(Ay)) Ay = Ay[2:(1+3)] - Xproj[,i] = c(Ax,Dx) - Yproj[,i] = c(Ay,Dy) XYproj[,i] = c(Ax,Dx,Ay,Dy) } - res_valse = valse(x,y) - res_valse_proj = valse(Xproj, Yproj) + res_valse = valse(x,y, kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 200) + res_valse_proj = valse(XYproj[1:p,],XYproj[(p+1):(2*p),], kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 200) - save(res_valse,file=paste("./Out/Res_",ite, ".RData",sep="")) - save(res_valse_proj,file=paste("./Out/ResProj_",ite, ".RData",sep="")) + save(res_valse,file=paste("Res_",ite, ".RData",sep="")) + save(res_valse_proj,file=paste("ResProj_",ite, ".RData",sep="")) } -- 2.44.0