From: Benjamin Auder Date: Fri, 3 Mar 2017 10:21:57 +0000 (+0100) Subject: draft of constructionModelesLassoMLE.R X-Git-Url: https://git.auder.net/variants/current/doc/css/config.php?a=commitdiff_plain;h=07848d25af9f342f7d8e2dd103f2502d945afe54;p=valse.git draft of constructionModelesLassoMLE.R --- diff --git a/R/constructionModelesLassoMLE.R b/R/constructionModelesLassoMLE.R index 346339e..55e9419 100644 --- a/R/constructionModelesLassoMLE.R +++ b/R/constructionModelesLassoMLE.R @@ -1,57 +1,37 @@ constructionModelesLassoMLE = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda, - X,Y,seuil,tau,A1,A2) + X,Y,seuil,tau,selected) { - n = dim(X)[1]; - p = dim(phiInit)[1] - m = dim(phiInit)[2] - k = dim(phiInit)[3] - L = length(glambda) - - #output parameters - phi = array(0, dim=c(p,m,k,L)) - rho = array(0, dim=c(m,m,k,L)) - pi = matrix(0, k, L) - llh = matrix(0, L, 2) #log-likelihood - - for(lambdaIndex in 1:L) + #TODO: parameter ncores (chaque tâche peut aussi demander du parallélisme...) + cl = parallel::makeCluster( parallel::detectCores() / 4 ) + parallel::clusterExport(cl=cl, + varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","seuil","tau"), + envir=environment()) + #Pour chaque lambda de la grille, on calcule les coefficients + out = parLapply( seq_along(glambda), function(lambdaindex) { - a = A1[,1,lambdaIndex] - a = a[a!=0] - if(length(a)==0) - next + n = dim(X)[1] + p = dim(phiInit)[1] + m = dim(phiInit)[2] + k = dim(phiInit)[3] - res = EMGLLF(phiInit[a,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[,a],Y,tau) + #TODO: phiInit[selected] et X[selected] sont bien sûr faux; par quoi remplacer ? + #lambda == 0 c'est normal ? + res = EMGLLF(phiInit[selected],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[selected],Y,tau) - #TODO: supprimer ça et utiliser parLapply(...) - for (j in 1:length(a)) - phi[a[j],,,lambdaIndex] = res$phi[j,,] - rho[,,,lambdaIndex] = res$rho - pi[,lambdaIndex] = res$pi - - dimension = 0 - for (j in 1:p) #TODO: doit pouvoir être fait beaucoup plus simplement - { - b = A2[j,2:dim(A2)[2],lambdaIndex] - b = b[b!=0] - if (length(b) > 0) - phi[A2[j,1,lambdaIndex],b,,lambdaIndex] = 0. - c = A1[j,2:dim(A1)[2],lambdaIndex] - dimension = dimension + sum(c!=0) - } + #comment évaluer la dimension à partir du résultat et de [not]selected ? + #dimension = ... #on veut calculer l'EMV avec toutes nos estimations - densite = matrix(0, nrow=n, ncol=L) - for (i in 1:n) #TODO: pas besoin de cette boucle (vectorize) + densite = vector("double",n) + for (r in 1:k) { - for (r in 1:k) - { - delta = Y[i,]%*%rho[,,r,lambdaIndex] - (X[i,a]%*%phi[a,,r,lambdaIndex]); - densite[i,lambdaIndex] = densite[i,lambdaIndex] + pi[r,lambdaIndex] * - det(rho[,,r,lambdaIndex])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0) - } + delta = Y%*%rho[,,r] - (X[selected]%*%res$phi[selected,,r]) + densite = densite + pi[r] * + det(rho[,,r])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0) } - llh[lambdaIndex,1] = sum(log(densite[,lambdaIndex])) - llh[lambdaIndex,2] = (dimension+m+1)*k-1 - } - return (list("phi"=phi, "rho"=rho, "pi"=pi, "llh" = llh)) + llh = c( sum(log(densite[,lambdaIndex])), (dimension+m+1)*k-1 ) + list("phi"=res$phi, "rho"=res$rho, "pi"=res$pi, "llh" = llh) + }) + parallel::stopCluster(cl) + out } diff --git a/R/selectVariables.R b/R/selectVariables.R index 03578c9..46fb3f3 100644 --- a/R/selectVariables.R +++ b/R/selectVariables.R @@ -21,17 +21,19 @@ #' @export selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau) { + #TODO: parameter ncores (chaque tâche peut aussi demander du parallélisme...) cl = parallel::makeCluster( parallel::detectCores() / 4 ) parallel::clusterExport(cl=cl, varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","seuil","tau"), envir=environment()) #Pour chaque lambda de la grille, on calcule les coefficients - out = parLapply( 1:L, function(lambdaindex) + out = parLapply( seq_along(glambda), function(lambdaindex) { - params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda[lambdaIndex],X,Y,tau) - p = dim(phiInit)[1] m = dim(phiInit)[2] + + params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda[lambdaIndex],X,Y,tau) + #selectedVariables: list where element j contains vector of selected variables in [1,m] selectedVariables = lapply(1:p, function(j) { #from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,