From: Benjamin Auder Date: Sun, 4 Dec 2016 12:14:37 +0000 (+0100) Subject: R: cosmetics + start translation of (selmix.m into) main.R X-Git-Url: https://git.auder.net/DESCRIPTION?a=commitdiff_plain;h=39046da6016f15d625bd99cf0303ea8beb838c79;p=valse.git R: cosmetics + start translation of (selmix.m into) main.R --- diff --git a/R/basicInitParameters.R b/R/basicInitParameters.R index bc7e88b..6090d0a 100644 --- a/R/basicInitParameters.R +++ b/R/basicInitParameters.R @@ -1,23 +1,18 @@ -basic_Init_Parameters = function(n,p,m,k){ - phiInit = array(0, dim=c(p,m,k)) - - piInit = (1.0/k)*rep.int(1,k) - - rhoInit = array(0, dim=c(m,m,k)) - - for(i in 1:k){ - rhoInit[,,i] = diag(m) - } - - gamInit = 0.1*array(1, dim=c(n,k)) - - R = sample(1:k,n, replace= TRUE) - - for(i in 1:n){ - gamInit[i,R[i]] = 0.9 - } - gamInit = gamInit/sum(gamInit[1,]) - - - return(list(phiInit, rhoInit, piInit, gamInit)) +basic_Init_Parameters = function(n,p,m,k) +{ + phiInit = array(0, dim=c(p,m,k)) + + piInit = (1./k)*rep.int(1,k) + + rhoInit = array(0, dim=c(m,m,k)) + for(i in 1:k) + rhoInit[,,i] = diag(m) + + gamInit = 0.1*array(1, dim=c(n,k)) + R = sample(1:k,n, replace=TRUE) + for(i in 1:n) + gamInit[i,R[i]] = 0.9 + gamInit = gamInit/sum(gamInit[1,]) + + return (list(phiInit, rhoInit, piInit, gamInit)) } diff --git a/R/generateIO.R b/R/generateIO.R index 9e84af5..f8c8194 100644 --- a/R/generateIO.R +++ b/R/generateIO.R @@ -1,25 +1,26 @@ -library(MASS) #simulate from a multivariate normal distribution +generateIO = function(covX, covY, pi, beta, n) +{ + size_covX = dim(covX) + p = size_covX[1] + k = size_covX[3] -generateIO = function(meanX, covX, covY, pi, beta, n){ #don't need meanX - size_covX = dim(covX) - p = size_covX[1] - k = size_covX[3] - - size_covY = dim(covY) - m = size_covY[1] - - Y = matrix(0,n,m) - BX = array(0, dim=c(n,m,k)) - - for(i in 1:n){ - for(r in 1:k){ - BXir = rep(0,m) - for(mm in 1:m){ - Bxir[[mm]] = X[i,] %*% beta[,mm,r] - } - Y[i,]=Y[i,] + pi[[r]] * mvrnorm(1,BXir, covY[,,r]) - } - } - - return(list(X,Y)) -} \ No newline at end of file + size_covY = dim(covY) + m = size_covY[1] + + Y = matrix(0,n,m) + BX = array(0, dim=c(n,m,k)) + + require(MASS) #simulate from a multivariate normal distribution + for (i in 1:n) + { + for (r in 1:k) + { + BXir = rep(0,m) + for (mm in 1:m) + Bxir[[mm]] = X[i,] %*% beta[,mm,r] + Y[i,] = Y[i,] + pi[r] * mvrnorm(1,BXir, covY[,,r]) + } + } + + return (list(X=X,Y=Y)) +} diff --git a/R/generateIOdefault.R b/R/generateIOdefault.R index 1d5c160..fc01cd8 100644 --- a/R/generateIOdefault.R +++ b/R/generateIOdefault.R @@ -1,25 +1,22 @@ -generateIOdefault = function(n, p, m, k){ - rangeX = 100 - meanX = rangeX*(1-matrix(runif(k*p),ncol = p)) - - covX = array(0, dim=c(p,p,k)) - covY = array(0, dim=c(m,m,k)) - - for(r in 1:k){ - covX[,,r] = diag(p) - covY[,,r] = diag(m) - } - - pi = (1/k) * rep(1,k) - - beta = array(0, dim=c(p,m,k)) - - for(j in 1:p){ - nonZeroCount = ceiling(m * runif(1)) - beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k),ncol = k) - } - - generate = generateIO(meanX, covX, covY, pi, beta, n) - - return(list(generate[[1]],generate[[2]])) -} \ No newline at end of file +generateIOdefault = function(n, p, m, k) +{ + covX = array(0, dim=c(p,p,k)) + covY = array(0, dim=c(m,m,k)) + for(r in 1:k) + { + covX[,,r] = diag(p) + covY[,,r] = diag(m) + } + + pi = rep(1./k,k) + + beta = array(0, dim=c(p,m,k)) + for(j in 1:p) + { + nonZeroCount = ceiling(m * runif(1)) + beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k) + } + + sample_IO = generateIO(covX, covY, pi, beta, n) + return (list(X=sample_IO$X,Y=sample_IO$Y)) +} diff --git a/R/gridLambda.R b/R/gridLambda.R index 87e9299..66b6cc2 100644 --- a/R/gridLambda.R +++ b/R/gridLambda.R @@ -1,29 +1,20 @@ -gridLambda = function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, tau){ - n = nrow(X) - p = dimension(phiInit)[1] - m = dimension(phiInit)[2] - k = dimension(phiInit)[3] - list_EMG = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,1,0,X,Y,tau) - #.C("EMGLLF", phiInit = phiInit, rhoInit = rhoInit, ...) - phi = list_EMG[[1]] - rho = list_EMG[[2]] - pi = list_EMG[[3]] - S = list_EMG[[5]] - - grid = array(0, dim=c(p,m,k)) - for(i in 1:p){ - for(j in 1:m){ - grid[i,j,] = abs(S[i,j,]) / (n*pi^gamma) - } - } - grid = unique(grid) - grid = grid[grid <=1 ] - - return(grid) -} +gridLambda = function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, tau) +{ + n = nrow(X) + p = dim(phiInit)[1] + m = dim(phiInit)[2] + k = dim(phiInit)[3] + + list_EMG = .Call("EMGLLF",phiInit,rhoInit,piInit,gamInit,mini,maxi,1,0,X,Y,tau) + grid = array(0, dim=c(p,m,k)) + for (i in 1:p) + { + for (j in 1:m) + grid[i,j,] = abs(list_EMG$S[i,j,]) / (n*list_EMG$pi^gamma) + } + grid = unique(grid) + grid = grid[grid <=1] -#test pour voir si formatage à la fin de grid ok -grid= array(mvrnorm(5*5*2,1,1), dim=c(5,5,2)) -grid = unique(grid) -grid = grid[grid<= 1 ] + return(grid) +} diff --git a/R/initSmallEM.R b/R/initSmallEM.R index 8f3c86b..d519766 100644 --- a/R/initSmallEM.R +++ b/R/initSmallEM.R @@ -1,80 +1,84 @@ -library(MASS) #generalized inverse of matrix Monroe-Penrose - -vec_bin = function(X,r){ - Z = c() - indice = c() - j=1 - for(i in 1:length(X)){ - if(X[i] == r){ - Z[i] = 1 - indice[j] = i - j=j+1 - } - else{ - Z[i] = 0 - } - } - return(list(Z,indice)) +vec_bin = function(X,r) +{ + Z = c() + indice = c() + j = 1 + for (i in 1:length(X)) + { + if(X[i] == r) + { + Z[i] = 1 + indice[j] = i + j=j+1 + } else + Z[i] = 0 + } + return (list(Z=Z,indice=indice)) } -initSmallEM = function(k,X,Y,tau){ - n = nrow(Y) - m = ncol(Y) - p = ncol(X) +initSmallEM = function(k,X,Y,tau) +{ + n = nrow(Y) + m = ncol(Y) + p = ncol(X) - betaInit1 = array(0, dim=c(p,m,k,20)) - sigmaInit1 = array(0, dim = c(m,m,k,20)) - phiInit1 = array(0, dim = c(p,m,k,20)) - rhoInit1 = array(0, dim = c(m,m,k,20)) - piInit1 = matrix(0,20,k) - gamInit1 = array(0, dim=c(n,k,20)) - LLFinit1 = list() - - - for(repet in 1:20){ - clusters = hclust(dist(y)) #default distance : euclidean - clusterCut = cutree(clusters,k) - Zinit1[,repet] = clusterCut #retourne les indices (à quel cluster indiv_i appartient) d'un clustering hierarchique (nb de cluster = k) - - for(r in 1:k){ - Z = Zinit1[,repet] - Z_bin = vec_bin(Z,r) - Z_vec = Z_bin[[1]] #vecteur 0 et 1 aux endroits où Z==r - Z_indice = Z_bin[[2]] #renvoit les indices où Z==r - - betaInit1[,,r,repet] = ginv(t(x[Z_indice,])%*%x[Z_indice,])%*%t(x[Z_indice,])%*%y[Z_indice,] - sigmaInit1[,,r,repet] = diag(m) - phiInit1[,,r,repet] = betaInit1[,,r,repet]/sigmaInit1[,,r,repet] - rhoInit1[,,r,repet] = solve(sigmaInit1[,,r,repet]) - piInit1[repet,r] = sum(Z_vec)/n - } - - for(i in 1:n){ - for(r in 1:k){ - dotProduct = (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet]) %*% (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet]) - Gam[i,r] = piInit1[repet,r]*det(rhoInit1[,,r,repet])*exp(-0.5*dotProduct) - } - sumGamI = sum(gam[i,]) - gamInit1[i,,repet]= Gam[i,] / sumGamI - } - - miniInit = 10 - maxiInit = 11 - - new_EMG = EMGLLF(phiInit1[,,,repet],rhoInit1[,,,repet],piInit1[repet,],gamInit1[,,repet],miniInit,maxiInit,1,0,x,y,tau) - ##.C("EMGLLF", phiInit = phiInit, rhoInit = rhoInit, ...) - LLFEessai = new_EMG[[4]] - LLFinit1[[repet]] = LLFEessai[[length(LLFEessai)]] - } - - b = which.max(LLFinit1) - - phiInit = phiInit1[,,,b] - rhoInit = rhoInit1[,,,b] - piInit = piInit1[b,] - gamInit = gamInit1[,,b] - - return(list(phiInit, rhoInit, piInit, gamInit)) -} + betaInit1 = array(0, dim=c(p,m,k,20)) + sigmaInit1 = array(0, dim = c(m,m,k,20)) + phiInit1 = array(0, dim = c(p,m,k,20)) + rhoInit1 = array(0, dim = c(m,m,k,20)) + piInit1 = matrix(0,20,k) + gamInit1 = array(0, dim=c(n,k,20)) + LLFinit1 = list() + + require(MASS) #Moore-Penrose generalized inverse of matrix + for(repet in 1:20) + { + clusters = hclust(dist(y)) #default distance : euclidean + #cutree retourne les indices (à quel cluster indiv_i appartient) d'un clustering hierarchique + clusterCut = cutree(clusters,k) + Zinit1[,repet] = clusterCut + + for(r in 1:k) + { + Z = Zinit1[,repet] + Z_bin = vec_bin(Z,r) + Z_vec = Z_bin$Z #vecteur 0 et 1 aux endroits où Z==r + Z_indice = Z_bin$indice #renvoit les indices où Z==r + + betaInit1[,,r,repet] = + ginv(t(x[Z_indice,])%*%x[Z_indice,])%*%t(x[Z_indice,])%*%y[Z_indice,] + sigmaInit1[,,r,repet] = diag(m) + phiInit1[,,r,repet] = betaInit1[,,r,repet]/sigmaInit1[,,r,repet] + rhoInit1[,,r,repet] = solve(sigmaInit1[,,r,repet]) + piInit1[repet,r] = sum(Z_vec)/n + } + for(i in 1:n) + { + for(r in 1:k) + { + dotProduct = (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet]) %*% + (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet]) + Gam[i,r] = piInit1[repet,r]*det(rhoInit1[,,r,repet])*exp(-0.5*dotProduct) + } + sumGamI = sum(gam[i,]) + gamInit1[i,,repet]= Gam[i,] / sumGamI + } + miniInit = 10 + maxiInit = 11 + + new_EMG = .Call("EMGLLF",phiInit1[,,,repet],rhoInit1[,,,repet],piInit1[repet,], + gamInit1[,,repet],miniInit,maxiInit,1,0,x,y,tau) + LLFEessai = new_EMG$LLF + LLFinit1[repet] = LLFEessai[length(LLFEessai)] + } + + b = which.max(LLFinit1) + phiInit = phiInit1[,,,b] + rhoInit = rhoInit1[,,,b] + piInit = piInit1[b,] + gamInit = gamInit1[,,b] + + return (list(phiInit=phiInit, rhoInit=rhoInit, piInit=piInit, gamInit=gamInit)) +} diff --git a/R/main.R b/R/main.R index b5b5144..00b1be9 100644 --- a/R/main.R +++ b/R/main.R @@ -1,196 +1,210 @@ -## TODO: turn this code into R - -classdef selmix < handle - - properties (SetAccess = private) - %always user defined - X; % regression data (size n*p, where n is the number of observations, and p is the number of regressors) - Y; % response data (size n*m, where n is the number of observations, and m is the number of responses) - - %optionally user defined some default values - gamma; % power in the penalty - mini; % minimum number of iterations for EM algorithm - maxi; % maximum number of iterations for EM algorithm - eps; % threshold for stopping EM algorithm - kmin; % minimum number of components in the mixture - kmax; % maximum number of components in the mixture - rangmin; - rangmax; - - %computed through the workflow - phiInit; % initialisation for the reparametrized conditional mean parameter - rhoInit; % initialisation for the reparametrized variance parameter - piInit; % initialisation for the proportions - tauInit; % initialisation for the allocations probabilities in each component - gridLambda = []; % values for the regularization parameter grid - A1; %je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ... - A2; - Phi; % collection of estimations for the reparametrized conditional mean parameters - Rho; % collection of estimations for the reparametrized variance parameters - Pi; % collection of estimations for the proportions parameters - end - - properties (Constant) - %immutable - seuil = 1e-15; - end - - methods - %%%%%%%%%%%%%%%%%%%%%%% - %initialize main object - %%%%%%%%%%%%%%%%%%%%%%% - - function sx = selmix(X,Y,varargin) - %set defaults for optional inputs - optargs = {1.0 5 10 1e-6 2 3 2 3}; - %replace defaults by user parameters - optargs(1:length(varargin)) = varargin; - sx.X = X; - sx.Y = Y; - [sx.gamma,sx.mini,sx.maxi,sx.eps,sx.kmin,sx.kmax,sx.rangmin,sx.rangmax] = optargs{:}; - % z = somme(sx.X,sx.Y); - % sx.Z=z; - end - - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %core workflow: compute all models - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - function initParameters(sx,k) - [phi0,rho0,pi0,tau0] = initSmallEM(k,sx.X,sx.Y,sx.eps); %smallEM initializes parameters by k-means and regression model in each component, doing this 20 times, and keeping the values maximizing the likelihood after 10 iterations of the EM algorithm. - sx.phiInit = phi0; - sx.rhoInit = rho0; - sx.piInit = pi0; - sx.tauInit = tau0; - end - - function computeGridLambda(sx) - [sx.gridLambda] = grillelambda(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.X,sx.Y,sx.gamma,sx.mini,sx.maxi,sx.eps); - % computation of the regularization grid, according to explicit - % formulae given by EM algorithm. - end - - function computeRelevantParameters(sx) - [sx.A1,sx.A2,sx.Rho,sx.Pi] = selectiontotale(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps); - %select variables according to each regularization parameter - %from the grid: sx.A1 corresponding to selected variables, and - %sx.A2 corresponding to unselected variables. - end - - function [sx,phi,rho,pi]=runProcedure1(sx) - [phi,rho,pi,~] = constructionModelesLassoMLE(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps,sx.A1,sx.A2); - %compute parameter estimations, with the Maximum Likelihood - %Estimator, restricted on selected variables. - end - - function [phi] =runProcedure2(sx) - [phi,~] = constructionModelesLassoRank(sx.Pi,sx.Rho,sx.mini,sx.maxi,sx.X,sx.Y,sx.eps,sx.A1,sx.rangmin,sx.rangmax); - %compute parameter estimations, with the Low Rank - %Estimator, restricted on selected variables. - end - - % main loop: over all k and all lambda - function run(sx,procedure) % Run the all procedure, 1 with the - %maximum likelihood refitting, and 2 with the Low Rank refitting. - [p,m,~]=size(sx.phiInit); - for k=sx.kmin:sx.kmax - k - initParameters(sx,k); - computeGridLambda(sx); - computeRelevantParameters(sx); - if (procedure == 1) - [~,phi,rho,pi] = runProcedure1(sx); - Phi2 = sx.Phi; - Rho2 = sx.Rho; - Pi2 = sx.Pi; - p = size(sx.X,2); - m = size(sx.Y,2); - if size(Phi2) == 0 - sx.Phi(:,:,1:k,:) = phi; - sx.Rho(:,:,1:k,:) = rho; - sx.Pi(1:k,:) = pi; - else - sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4)); - sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2; - sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi; - sx.Rho = zeros(m,m,sx.kmax,size(Rho2,4)+size(rho,4)); - sx.Rho(:,:,1:size(Rho2,3),1:size(Rho2,4)) = Rho2; - sx.Rho(:,:,1:k,size(Rho2,4)+1:end) = rho; - sx.Pi = zeros(sx.kmax,size(Pi2,2)+size(pi,2)); - sx.Pi(1:size(Pi2,1),1:size(Pi2,2)) = Pi2; - sx.Pi(1:k,size(Pi2,2)+1:end) = pi; - end - else - [phi] = runProcedure2(sx); - phi - Phi2 = sx.Phi; - if size(Phi2,1) == 0 - sx.Phi(:,:,1:k,:) = phi; - else - size(Phi2) - sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4)); - size(sx.Phi) - sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2; - sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi; - end - - end - - - end - end - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %pruning: select only one (or a few best ?!) model - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % - % function[model] selectModel(sx) - % %TODO - % %model = sxModel(...); - % end - - end - -end - - -%%%%%%%%%%%%% -%OLD VERSION: - -%~ for k=K -%~ % On initialise -%~ initialisation -%~ disp('Initialisé') -%~ % On construit la grille des lambdas : variables informatives -%~ [glambda,phiEmv,rhoEmv,piEmv]=grillelambda(phiInit,rhoInit,piInit,tauInit,x,y,gamma,mini,maxi,tau); -%~ glambda=glambda(1:3); -%~ disp('glambda construite') -%~ % On trouve les variables informatives pour chaque lambda : S est la -%~ % matrice des coefficients des variables informatives -%~ % on parallelise à l interieur du selectiontotale() -%~ [B1,B2,rhoLasso,piLasso]=selectiontotale(phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,glambda,x,y,10^-15,tau); -%~ %S1 les variables informatives, S2 celles qui ne le sont pas -%~ [B1bis,B2bis,glambda2bis,ind,rhoLasso,piLasso]=suppressionmodelesegaux(B1,B2,glambda,rhoLasso,piLasso); -%~ dessinVariablesSelectionnees; -%~ disp('Le Lasso est fait') -%~ % Pour chaque lambda ainsi construit, on veut calculer l'EMV pour la procédure Lasso-MLE -%~ %On obtient une collection de modèles pour Lasso-MLE -%~ % ICI AUSSI ON PEUT PARALLELISER a l interieur de constructionModelesLassoMLE -%~ nombreLambda=size(B1bis,3); -%~ %[phiLassoMLE,rhoLassoMLE,piLassoMLE,vraisLassoMLE]=constructionModelesLassoMLE(phiInit,rhoInit,piInit, tauInit,mini,maxi,gamma,glambda2bis,X,Y,seuil,tau,B1bis,B2bis) -%~ %Pour Lasso-Rank -%~ %on ne garde que les colonnes actives -%~ B1ter=B1bis(:,1,:); -%~ % [B1ter,ind,rhoLasso,piLasso]=suppressionmodelesegaux2(B1ter,rhoLasso,piLasso) -%~ %Pour chaque lambda, on veut calculer l'estimateur low rank pour la procédure Lasso-Rank -%~ %On obtient une collection de modèles pour Lasso-Rank -%~ %ICI AUSSI ON PEUT PARALLELISER a linterieur de constructionModelesLassoRank -%~ nombreLambda2=size(B1ter,2); -%~ [phi,lvraisemblance,Z] = constructionModelesLassoRank(... -%~ piEmv,rhoEmv,mini,maxi,X,Y,tau,B1ter,2,4); -%~ disp('On a construit les modèles pour les deux procédures') -%~ end -%~ %selectionModelesLassoMLE; -%~ %selectionModelesLassoRank; -%~ disp('On a sélectionné les modèles dans les deux procédures') +SelMix = setRefClass( + Class = "selmix", + + fields = c( + # User defined + + # regression data (size n*p, where n is the number of observations, + # and p is the number of regressors) + X = "numeric", + # response data (size n*m, where n is the number of observations, + # and m is the number of responses) + Y = "numeric", + + # Optionally user defined (some default values) + + # power in the penalty + gamma = "double", + # minimum number of iterations for EM algorithm + mini = "integer", + # maximum number of iterations for EM algorithm + maxi = "integer", + # threshold for stopping EM algorithm + eps = "double", + # minimum number of components in the mixture + kmin = "integer", + # maximum number of components in the mixture + kmax = "integer", + rangmin = "integer", + rangmax = "integer", + + # Computed through the workflow + + # initialisation for the reparametrized conditional mean parameter + phiInit, + # initialisation for the reparametrized variance parameter + rhoInit, + # initialisation for the proportions + piInit, + # initialisation for the allocations probabilities in each component + tauInit, + # values for the regularization parameter grid + gridLambda = []; + # je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit + # une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ... + A1, + A2, + # collection of estimations for the reparametrized conditional mean parameters + Phi, + # collection of estimations for the reparametrized variance parameters + Rho, + # collection of estimations for the proportions parameters + Pi, + + #immutable + seuil = 1e-15; + ), + + methods = list( + ####################### + #initialize main object + ####################### + initialize = function(X,Y,...) + { + "Initialize SelMix object" + + callSuper(...) + + X <<- X; + Y <<- Y; + gamma <<- ifelse (hasArg("gamma"), gamma, 1.) + mini <<- ifelse (hasArg("mini"), mini, as.integer(5)) + maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10)) + eps <<- ifelse (hasArg("eps"), eps, 1e-6) + kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2)) + kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3)) + rangmin <<- ifelse (hasArg("rangmin"), rangmin, as.integer(2)) + rangmax <<- ifelse (hasArg("rangmax"), rangmax, as.integer(3)) + }, + + ################################## + #core workflow: compute all models + ################################## + + initParameters = function(k) + { + "Parameters initialization" + + #smallEM initializes parameters by k-means and regression model in each component, + #doing this 20 times, and keeping the values maximizing the likelihood after 10 + #iterations of the EM algorithm. + init = initSmallEM(k,sx.X,sx.Y,sx.eps); + phiInit <<- init$phi0; + rhoInit <<- init$rho0; + piInit <<- init$pi0; + tauInit <<- init$tau0; + }, + + computeGridLambda = function() + { + "computation of the regularization grid" + #(according to explicit formula given by EM algorithm) + + gridLambda <<- grillelambda(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.X,sx.Y, + sx.gamma,sx.mini,sx.maxi,sx.eps); + }, + + computeRelevantParameters = function() + { + "Compute relevant parameters" + + #select variables according to each regularization parameter + #from the grid: sx.A1 corresponding to selected variables, and + #sx.A2 corresponding to unselected variables. + params = selectiontotale(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit,sx.mini,sx.maxi, + sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps); + A1 <<- params$A1 + A2 <<- params$A2 + Rho <<- params$Rho + Pi <<- params$Pi + }, + + runProcedure1 = function() + { + "Run procedure 1 [EMGLLF]" + + #compute parameter estimations, with the Maximum Likelihood + #Estimator, restricted on selected variables. + res = constructionModelesLassoMLE(sx.phiInit,sx.rhoInit,sx.piInit,sx.tauInit, + sx.mini,sx.maxi,sx.gamma,sx.gridLambda,sx.X,sx.Y,sx.seuil,sx.eps,sx.A1,sx.A2); + return (list( phi=res$phi, rho=res$rho, pi=res$pi)) + }, + + runProcedure2 = function() + { + "Run procedure 2 [EMGrank]" + + #compute parameter estimations, with the Low Rank + #Estimator, restricted on selected variables. + return (constructionModelesLassoRank(sx.Pi,sx.Rho,sx.mini,sx.maxi,sx.X,sx.Y,sx.eps, + sx.A1,sx.rangmin,sx.rangmax)$phi) + }, + + run = function(procedure) + { + "main loop: over all k and all lambda" + + # Run the all procedure, 1 with the + #maximum likelihood refitting, and 2 with the Low Rank refitting. + p = dim(phiInit)[1] + m = dim(phiInit)[2] + for (k in kmin:kmax) + { + print(k) + initParameters(k) + computeGridLambda() + computeRelevantParameters() + if (procedure == 1) + { + r1 = runProcedure1(sx) + Phi2 = Phi + Rho2 = Rho + Pi2 = Pi + p = ncol(X) + m = ncol(Y) + if size(Phi2) == 0 #TODO: continue translation MATLAB --> R + sx.Phi(:,:,1:k,:) = r1$phi; + sx.Rho(:,:,1:k,:) = r1$rho; + sx.Pi(1:k,:) = r1$pi; + else + sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(r1$phi,4)); + sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2; + sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = r1$phi; + sx.Rho = zeros(m,m,sx.kmax,size(Rho2,4)+size(r1$rho,4)); + sx.Rho(:,:,1:size(Rho2,3),1:size(Rho2,4)) = Rho2; + sx.Rho(:,:,1:k,size(Rho2,4)+1:end) = r1$rho; + sx.Pi = zeros(sx.kmax,size(Pi2,2)+size(r1$pi,2)); + sx.Pi(1:size(Pi2,1),1:size(Pi2,2)) = Pi2; + sx.Pi(1:k,size(Pi2,2)+1:end) = r1$pi; + end + else + [phi] = runProcedure2(sx); + phi + Phi2 = sx.Phi; + if size(Phi2,1) == 0 + sx.Phi(:,:,1:k,:) = phi; + else + size(Phi2) + sx.Phi = zeros(p,m,sx.kmax,size(Phi2,4)+size(phi,4)); + size(sx.Phi) + sx.Phi(:,:,1:size(Phi2,3),1:size(Phi2,4)) = Phi2; + sx.Phi(:,:,1:k,size(Phi2,4)+1:end) = phi; + end + + end + + + end + end + + ################################################## + #pruning: select only one (or a few best ?!) model + ################################################## + # + # function[model] selectModel(sx) + # #TODO + # #model = sxModel(...); + # end + + ) +) diff --git a/R/selectionindice.R b/R/selectionindice.R index 6782b2e..97014b1 100644 --- a/R/selectionindice.R +++ b/R/selectionindice.R @@ -1,24 +1,28 @@ -selectionindice = function(phi, seuil){ - dim_phi = dim(phi) - pp = dim_phi[1] - m = dim_phi[2] - - A = matrix(0, pp, m) - B = matrix(0, pp, m) - - for(j in 1:pp){ - cpt1 = 0 - cpt2 = 0 - for(mm in 1:m){ - if(max(phi[j,mm,])> seuil){ - cpt1 = cpt1 + 1 - A[j,cpt] = mm - } - else{ - cpt2 = cpt2+1 - B[j, cpt2] = mm - } - } - } - return(list(A,B)) -} \ No newline at end of file +selectionindice = function(phi, seuil) +{ + dim_phi = dim(phi) + pp = dim_phi[1] + m = dim_phi[2] + + A = matrix(0, pp, m) + B = matrix(0, pp, m) + + for(j in 1:pp) + { + cpt1 = 0 + cpt2 = 0 + for(mm in 1:m) + { + if(max(phi[j,mm,]) > seuil) + { + cpt1 = cpt1 + 1 + A[j,cpt] = mm + } else + { + cpt2 = cpt2+1 + B[j, cpt2] = mm + } + } + } + return (list(A,B)) +} diff --git a/R/selectionmodele.R b/R/selectionmodele.R index b92c4e4..ed32731 100644 --- a/R/selectionmodele.R +++ b/R/selectionmodele.R @@ -1,38 +1,45 @@ -vec_bin = function(X,r){ - Z = c() - indice = c() - j=1 - for(i in 1:length(X)){ - if(X[i] == r){ - Z[i] = 1 - indice[j] = i - j=j+1 - } - else{ - Z[i] = 0 - } - } - return(list(Z,indice)) +vec_bin = function(X,r) +{ + Z = c() + indice = c() + + j = 1 + for(i in 1:length(X)) + { + if(X[i] == r) + { + Z[i] = 1 + indice[j] = i + j=j+1 + } else + Z[i] = 0 + } + + return (list(Z=Z,indice=indice)) } -selectionmodele = function(vraisemblance){ - D = vraimsemblance[,2] - D1 = unique(D) - - indice = rep(1, length(D1)) - - #select argmax MLE - if(length(D1)>2){ - for(i in 1:length(D1)){ - A = c() - for(j in 1:length(D)){ - if(D[[j]]==D1[[i]]){ - a = c(a, vraimsemblance[j,1]) - } - } - b = max(a) - indice[i] = which.max(vec_bin(vraimsemblance,b)[[1]]) #retourne le premier indice du vecteur binaire où u_i ==1 - } - } - return(list(indice,D1)) -} \ No newline at end of file +selectionmodele = function(vraisemblance) +{ + D = vraimsemblance[,2] + D1 = unique(D) + + indice = rep(1, length(D1)) + #select argmax MLE + if (length(D1)>2) + { + for (i in 1:length(D1)) + { + A = c() + for (j in 1:length(D)) + { + if(D[[j]]==D1[[i]]) + a = c(a, vraimsemblance[j,1]) + } + b = max(a) + #indice[i] : premier indice du vecteur binaire où u_i ==1 + indice[i] = which.max(vec_bin(vraimsemblance,b)[[1]]) + } + } + + return (list(indice=indice,D1=D1)) +} diff --git a/R/suppressionmodelesegaux.R b/R/suppressionmodelesegaux.R index a558efb..a588062 100644 --- a/R/suppressionmodelesegaux.R +++ b/R/suppressionmodelesegaux.R @@ -1,18 +1,20 @@ -suppressionmodelesegaux = function(B1,B2,glambda,rho,pi){ - ind = c() - for(j in 1:length(glambda)){ - for(ll in 1:(l-1)){ - if(B1[,,l] == B1[,,ll]){ - ind = c(ind, l) - } - } - } - ind = unique(ind) - B1 = B1[,,-ind] - glambda = glambda[-ind] - B2 = B2[,,-ind] - rho = rho[,,,-ind] - pi = pi[,-ind] - - return(list(B1,B2,glambda,ind,rho,pi)) -} \ No newline at end of file +suppressionmodelesegaux = function(B1,B2,glambda,rho,pi) +{ + ind = c() + for (j in 1:length(glambda)) + { + for (ll in 1:(l-1)) + { + if(B1[,,l] == B1[,,ll]) + ind = c(ind, l) + } + } + ind = unique(ind) + B1 = B1[,,-ind] + glambda = glambda[-ind] + B2 = B2[,,-ind] + rho = rho[,,,-ind] + pi = pi[,-ind] + + return (list(B1=B1,B2=B2,glambda=glambda,ind=ind,rho=rho,pi=pi)) +} diff --git a/R/suppressionmodelesegaux2.R b/R/suppressionmodelesegaux2.R index 4574afc..741793b 100644 --- a/R/suppressionmodelesegaux2.R +++ b/R/suppressionmodelesegaux2.R @@ -1,26 +1,24 @@ -suppressionmodelesegaux2 = function(B1,rho,pi){ - ind = c() - dim_B1 = dim(B1) - B2 = array(0,dim=c(dim_B1[1],dim_B1[2],dim_B1[3])) - nombreLambda=dim_B1[[2]] - glambda = rep(0,nombreLambda) - - #for(j in 1:nombreLambda){ - # for(ll in 1:(l-1)){ - # if(B1[,,l] == B1[,,ll]){ - # ind = c(ind, l) - # } - # } - #} - #ind = unique(ind) - #B1 = B1[,,-ind] - #rho = rho[,,,-ind] - #pi = pi[,-ind] - - suppressmodel = suppressionmodelesegaux(B1,B2,glambda,rho,pi) - B1 = suppressmodel[[1]] - ind = suppressmodel[[4]] - rho = suppressmodel[[5]] - pi = suppressmodel[[6]] - return(list(B1,ind,rho,pi)) -} \ No newline at end of file +suppressionmodelesegaux2 = function(B1,rho,pi) +{ + ind = c() + dim_B1 = dim(B1) + B2 = array(0,dim=c(dim_B1[1],dim_B1[2],dim_B1[3])) + nombreLambda=dim_B1[[2]] + glambda = rep(0,nombreLambda) + + #for(j in 1:nombreLambda){ + # for(ll in 1:(l-1)){ + # if(B1[,,l] == B1[,,ll]){ + # ind = c(ind, l) + # } + # } + #} + #ind = unique(ind) + #B1 = B1[,,-ind] + #rho = rho[,,,-ind] + #pi = pi[,-ind] + + suppressmodel = suppressionmodelesegaux(B1,B2,glambda,rho,pi) + return (list(B1 = suppressmodel$B1, ind = suppressmodel$B2, + rho = suppressmodel$rho, pi = suppressmodel$pi)) +} diff --git a/data/TODO b/data/TODO index e69de29..c0603b4 100644 --- a/data/TODO +++ b/data/TODO @@ -0,0 +1,2 @@ +Trouver un jeu de données (+) intéressant (que les autres) ? +Ajouter toy datasets pour les tests (ou piocher dans MASS ?)