From: Benjamin Auder Date: Sun, 12 Feb 2017 21:02:14 +0000 (+0100) Subject: fix generateRunTest for constructionModelesEMGrank; remove man pages (will be roxygen... X-Git-Url: https://git.auder.net/variants/Bario/assets/img/current/gitweb.js?a=commitdiff_plain;h=ef67d338c7f28ba041abe40ca9a8ab69f8365a90;p=valse.git fix generateRunTest for constructionModelesEMGrank; remove man pages (will be roxygenized); clean code --- diff --git a/.gitignore b/.gitignore index 767ee1a..30d9a72 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ .RData *.swp *~ +/man/ +!/man/*-package.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 07caf92..cdda4e4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,21 @@ Package: valse -Title: Variable selection with mixture of models +Title: VAriabLe SElection with mixture of models Date: 2016-12-01 Version: 0.1-0 Description: TODO -Authors@R: c( person("Benjamin Auder", "Developer", role=c("ctb","cre"), email="Benjamin.Auder@math.u-psud.fr"), - person("Benjamin Goehry", "User", role="aut", email = "Benjamin.Goehry@math.u-psud.fr"), - person("Emilie Devijver", "User", role="ctb", email = "Emilie.Devijver@kuleuven.be")) +Author: Benjamin Auder [aut,cre], + Benjamin Goehry [aut] + Emilie Devijver [aut] +Maintainer: Benjamin Auder Depends: - R (>= 2.15) -LazyData: yes + R (>= 3.0.0) +Imports: + MASS +Suggests: + parallel, + testthat, + knitr URL: http://git.auder.net/?p=valse.git -License: MIT +License: MIT + file LICENSE +VignetteBuilder: knitr RoxygenNote: 5.0.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..727af26 --- /dev/null +++ b/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) + 2014-2016, Emilie Devijver + 2014-2017, Benjamin Auder + 2016-2017, Benjamin Goehry + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 656ff8d..0f566e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,11 @@ # Generated by roxygen2: do not edit by hand -export(basic_Init_Parameters) -export(discardSimilarModels) -export(discardSimilarModels2) -export(generateIO) -export(generateIOdefault) +export(basicInitParameters) +export(discardSimilarModels_EMGLLF) +export(discardSimilarModels_EMGrank) +export(generateXY) +export(generateXYdefault) export(gridLambda) -export(indicesSelection) export(initSmallEM) export(modelSelection) export(selectVariables) diff --git a/R/basicInitParameters.R b/R/basicInitParameters.R deleted file mode 100644 index 3583e68..0000000 --- a/R/basicInitParameters.R +++ /dev/null @@ -1,28 +0,0 @@ -#----------------------------------------------------------------------- -#' Initialize the parameters in a basic way (zero for the conditional mean, -#' uniform for weights, identity for covariance matrices, and uniformly distributed forthe clustering) -#' @param n sample size -#' @param p number of covariates -#' @param m size of the response -#' @param k number of clusters -#' @return list with phiInit, rhoInit,piInit,gamInit -#' @export -#----------------------------------------------------------------------- -basic_Init_Parameters = function(n,p,m,k) -{ - phiInit = array(0, dim=c(p,m,k)) - - piInit = (1./k)*rep.int(1,k) - - rhoInit = array(0, dim=c(m,m,k)) - for(i in 1:k) - rhoInit[,,i] = diag(m) - - gamInit = 0.1*array(1, dim=c(n,k)) - R = sample(1:k,n, replace=TRUE) - for(i in 1:n) - gamInit[i,R[i]] = 0.9 - gamInit = gamInit/sum(gamInit[1,]) - - return (data = list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit)) -} diff --git a/R/discardSimilarModels.R b/R/discardSimilarModels.R index 29d8f10..5f6a8c8 100644 --- a/R/discardSimilarModels.R +++ b/R/discardSimilarModels.R @@ -1,4 +1,4 @@ -#' Discard models which have the same relevant variables +#' Discard models which have the same relevant variables - for EMGLLF #' #' @param B1 array of relevant coefficients (of size p*m*length(gridlambda)) #' @param B2 array of irrelevant coefficients (of size p*m*length(gridlambda)) @@ -9,7 +9,7 @@ #' @return a list with update B1, B2, glambda, rho and pi, and ind the vector of indices #' of selected models. #' @export -discardSimilarModels = function(B1,B2,glambda,rho,pi) +discardSimilarModels_EMGLLF = function(B1,B2,glambda,rho,pi) { ind = c() for (j in 1:length(glambda)) @@ -27,5 +27,27 @@ discardSimilarModels = function(B1,B2,glambda,rho,pi) rho = rho[,,,-ind] pi = pi[,-ind] - return (list(B1=B1,B2=B2,glambda=glambda,rho=rho,pi=pi,ind=ind)) + return (list("B1"=B1,"B2"=B2,"glambda"=glambda,"rho"=rho,"pi"=pi,"ind"=ind)) +} + +#' Discard models which have the same relevant variables +#' - for Lasso-rank procedure (focus on columns) +#' +#' @param B1 array of relevant coefficients (of size p*m*length(gridlambda)) +#' @param rho covariance matrix +#' @param pi weight parameters +#' +#' @return a list with B1, in, rho, pi +#' @export +discardSimilarModels_EMGrank = function(B1,rho,pi) +{ + ind = c() + dim_B1 = dim(B1) + B2 = array(0,dim=c(dim_B1[1],dim_B1[2],dim_B1[3])) + sizeLambda=dim_B1[3] + glambda = rep(0,sizeLambda) + + suppressmodel = discardSimilarModels_EMGLLF(B1,B2,glambda,rho,pi) + return (list("B1" = suppressmodel$B1, "ind" = suppressmodel$ind, + "rho" = suppressmodel$rho, "pi" = suppressmodel$pi)) } diff --git a/R/discardSimilarModels2.R b/R/discardSimilarModels2.R deleted file mode 100644 index b59c1ba..0000000 --- a/R/discardSimilarModels2.R +++ /dev/null @@ -1,20 +0,0 @@ -#' Similar to discardSimilarModels, for Lasso-rank procedure (focus on columns) -#' -#' @param B1 array of relevant coefficients (of size p*m*length(gridlambda)) -#' @param rho covariance matrix -#' @param pi weight parameters -#' -#' @return a list with B1, in, rho, pi -#' @export -#' -discardSimilarModels2 = function(B1,rho,pi) -{ ind = c() - dim_B1 = dim(B1) - B2 = array(0,dim=c(dim_B1[1],dim_B1[2],dim_B1[3])) - sizeLambda=dim_B1[3] - glambda = rep(0,sizeLambda) - - suppressmodel = discardSimilarModels(B1,B2,glambda,rho,pi) - return (list(B1 = suppressmodel$B1, ind = suppressmodel$ind, - rho = suppressmodel$rho, pi = suppressmodel$pi)) -} diff --git a/R/generateIO.R b/R/generateIO.R deleted file mode 100644 index 5f19488..0000000 --- a/R/generateIO.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Generate a sample of (X,Y) of size n -#' @param covX covariance for covariates (of size p*p*K) -#' @param covY covariance for the response vector (of size m*m*K) -#' @param pi proportion for each cluster -#' @param beta regression matrix -#' @param n sample size -#' -#' @return list with X and Y -#' @export -#----------------------------------------------------------------------- -generateIO = function(covX, covY, pi, beta, n) -{ - p = dim(covX)[1] - - m = dim(covY)[1] - k = dim(covY)[3] - - Y = matrix(0,n,m) - require(mvtnorm) - X = rmvnorm(n, mean = rep(0,p), sigma = covX) - - require(MASS) #simulate from a multivariate normal distribution - for (i in 1:n) - { - - for (r in 1:k) - { - BXir = rep(0,m) - for (mm in 1:m) - BXir[mm] = X[i,] %*% beta[,mm,r] - Y[i,] = Y[i,] + pi[r] * mvrnorm(1,BXir, covY[,,r]) - } - } - - return (list(X=X,Y=Y)) -} diff --git a/R/generateIOdefault.R b/R/generateIOdefault.R deleted file mode 100644 index b0d748a..0000000 --- a/R/generateIOdefault.R +++ /dev/null @@ -1,29 +0,0 @@ -#' Generate a sample of (X,Y) of size n with default values -#' @param n sample size -#' @param p number of covariates -#' @param m size of the response -#' @param k number of clusters -#' @return list with X and Y -#' @export -#----------------------------------------------------------------------- -generateIOdefault = function(n, p, m, k) -{ - covX = diag(p) - covY = array(0, dim=c(m,m,k)) - for(r in 1:k) - { - covY[,,r] = diag(m) - } - - pi = rep(1./k,k) - - beta = array(0, dim=c(p,m,k)) - for(j in 1:p) - { - nonZeroCount = ceiling(m * runif(1)) - beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k) - } - - sample_IO = generateIO(covX, covY, pi, beta, n) - return (list(X=sample_IO$X,Y=sample_IO$Y)) -} diff --git a/R/generateSampleInputs.R b/R/generateSampleInputs.R new file mode 100644 index 0000000..fb67b08 --- /dev/null +++ b/R/generateSampleInputs.R @@ -0,0 +1,87 @@ +#' Generate a sample of (X,Y) of size n +#' @param meanX matrix of group means for covariates (of size p*K) +#' @param covX covariance for covariates (of size p*p*K) +#' @param covY covariance for the response vector (of size m*m*K) +#' @param pi proportion for each cluster +#' @param beta regression matrix +#' @param n sample size +#' +#' @return list with X and Y +#' @export +generateXY = function(meanX, covX, covY, pi, beta, n) +{ + p = dim(covX)[1] + m = dim(covY)[1] + k = dim(covX)[3] + + X = matrix(nrow=n,ncol=p) + Y = matrix(nrow=n,ncol=m) + + require(MASS) #simulate from a multivariate normal distribution + for (i in 1:n) + { + class = sample(1:k, 1, prob=pi) + X[i,] = mvrnorm(1, meanX[,class], covX[,,class]) + Y[i,] = mvrnorm(1, X[i,] %*% beta[,,class], covY[,,class]) + } + + return (list(X=X,Y=Y)) +} + +#' Generate a sample of (X,Y) of size n with default values +#' @param n sample size +#' @param p number of covariates +#' @param m size of the response +#' @param k number of clusters +#' @return list with X and Y +#' @export +generateXYdefault = function(n, p, m, k) +{ + rangeX = 100 + meanX = rangeX * matrix(1 - 2*runif(p*k), ncol=k) + covX = array(dim=c(p,p,k)) + covY = array(dim=c(m,m,k)) + for(r in 1:k) + { + covX[,,r] = diag(p) + covY[,,r] = diag(m) + } + pi = rep(1./k,k) + #initialize beta to a random number of non-zero random value + beta = array(0, dim=c(p,m,k)) + for (j in 1:p) + { + nonZeroCount = sample(1:m, 1) + beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k) + } + + sample_IO = generateXY(meanX, covX, covY, pi, beta, n) + return (list(X=sample_IO$X,Y=sample_IO$Y)) +} + +#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights, +#' identity for covariance matrices, and uniformly distributed for the clustering) +#' @param n sample size +#' @param p number of covariates +#' @param m size of the response +#' @param k number of clusters +#' @return list with phiInit, rhoInit,piInit,gamInit +#' @export +basicInitParameters = function(n,p,m,k) +{ + phiInit = array(0, dim=c(p,m,k)) + + piInit = (1./k)*rep(1,k) + + rhoInit = array(dim=c(m,m,k)) + for (i in 1:k) + rhoInit[,,i] = diag(m) + + gamInit = 0.1 * matrix(1, nrow=n, ncol=k) + R = sample(1:k, n, replace=TRUE) + for (i in 1:n) + gamInit[i,R[i]] = 0.9 + gamInit = gamInit/sum(gamInit[1,]) + + return (list("phiInit" = phiInit, "rhoInit" = rhoInit, "piInit" = piInit, "gamInit" = gamInit)) +} diff --git a/R/indicesSelection.R b/R/indicesSelection.R deleted file mode 100644 index 7835430..0000000 --- a/R/indicesSelection.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Construct the set of relevant indices -> ED: je crois que cette fonction n'est pas utile -#' -#' @param phi regression matrix, of size p*m -#' @param thresh threshold to say a cofficient is equal to zero -#' -#' @return a list with A, a matrix with relevant indices (size = p*m) and B, a -#' matrix with irrelevant indices (size = p*m) -#' @export -indicesSelection = function(phi, thresh = 1e-6) -{ - dim_phi = dim(phi) - p = dim_phi[1] - m = dim_phi[2] - - A = matrix(0, p, m) - B = matrix(0, p, m) - - for(j in 1:p) - { - cpt1 = 0 - cpt2 = 0 - for(mm in 1:m) - { - if(max(phi[j,mm,]) > thresh) - { - cpt1 = cpt1 + 1 - A[j,cpt] = mm - } else - { - cpt2 = cpt2+1 - B[j, cpt2] = mm - } - } - } - return (list(A=A,B=B)) -} diff --git a/R/initSmallEM.R b/R/initSmallEM.R index 6dd7457..e2157b2 100644 --- a/R/initSmallEM.R +++ b/R/initSmallEM.R @@ -13,7 +13,7 @@ initSmallEM = function(k,X,Y,tau) m = ncol(Y) p = ncol(X) - Zinit1 = array(0, dim=c(n,20)) + Zinit1 = array(0, dim=c(n,20)) betaInit1 = array(0, dim=c(p,m,k,20)) sigmaInit1 = array(0, dim = c(m,m,k,20)) phiInit1 = array(0, dim = c(p,m,k,20)) @@ -35,7 +35,8 @@ initSmallEM = function(k,X,Y,tau) Z = Zinit1[,repet] Z_indice = seq_len(n)[Z == r] #renvoit les indices où Z==r - betaInit1[,,r,repet] = ginv(crossprod(X[Z_indice,])) %*% crossprod(X[Z_indice,], Y[Z_indice,]) + betaInit1[,,r,repet] = ginv(crossprod(X[Z_indice,])) %*% + crossprod(X[Z_indice,], Y[Z_indice,]) sigmaInit1[,,r,repet] = diag(m) phiInit1[,,r,repet] = betaInit1[,,r,repet] #/ sigmaInit1[,,r,repet] rhoInit1[,,r,repet] = solve(sigmaInit1[,,r,repet]) @@ -56,7 +57,8 @@ initSmallEM = function(k,X,Y,tau) miniInit = 10 maxiInit = 11 - new_EMG = .Call("EMGLLF_core",phiInit1[,,,repet],rhoInit1[,,,repet],piInit1[repet,],gamInit1[,,repet],miniInit,maxiInit,1,0,X,Y,tau) + new_EMG = .Call("EMGLLF_core",phiInit1[,,,repet],rhoInit1[,,,repet],piInit1[repet,], + gamInit1[,,repet],miniInit,maxiInit,1,0,X,Y,tau) LLFEessai = new_EMG$LLF LLFinit1[repet] = LLFEessai[length(LLFEessai)] } diff --git a/R/modelSelection.R b/R/modelSelection.R index 81e832a..86e2efd 100644 --- a/R/modelSelection.R +++ b/R/modelSelection.R @@ -1,11 +1,11 @@ #' Among a collection of models, this function constructs a subcollection of models with -#' models having strictly different dimensions, keeping the model which minimizes +#' models having strictly different dimensions, keeping the model which minimizes #' the likelihood if there were several with the same dimension #' #' @param LLF a matrix, the first column corresponds to likelihoods for several models #' the second column corresponds to the dimensions of the corresponding models. #' -#' @return a list with indices, a vector of indices selected models, +#' @return a list with indices, a vector of indices selected models, #' and D1, a vector of corresponding dimensions #' @export #' @@ -34,3 +34,7 @@ modelSelection = function(LLF) return (list(indices=indices,D1=D1)) } + +#TODO: +## Programme qui sélectionne un modèle +## proposer à l'utilisation différents critères (BIC, AIC, slope heuristic) diff --git a/R/modelSelectionCrit.R b/R/modelSelectionCrit.R deleted file mode 100644 index 81f373d..0000000 --- a/R/modelSelectionCrit.R +++ /dev/null @@ -1,2 +0,0 @@ -## Programme qui sélectionne un modèle -## proposer à l'utilisation différents critères (BIC, AIC, slope heuristic) \ No newline at end of file diff --git a/clean.sh b/clean.sh index 2d5e2d1..abf77b8 100755 --- a/clean.sh +++ b/clean.sh @@ -1,5 +1,6 @@ #!/bin/sh rm -f src/*.so -rm -f adapters/*.o -rm -f sources/*.o +rm -f src/adapters/*.o +rm -f src/sources/*.o +cd src/test && make cclean && cd ../.. diff --git a/data/TODO b/data/TODO index a3bb58d..7e3c7ec 100644 --- a/data/TODO +++ b/data/TODO @@ -1,4 +1,4 @@ Trouver un jeu de données (+) intéressant (que les autres) ? Ajouter toy datasets pour les tests (ou piocher dans MASS ?) -ED : j'ai simulé un truc basique via 'generateIOdefault(10,5,6,2)' +ED : j'ai simulé un truc basique via 'generateXYdefault(10,5,6,2)' diff --git a/man/basic_Init_Parameters.Rd b/man/basic_Init_Parameters.Rd deleted file mode 100644 index 981b984..0000000 --- a/man/basic_Init_Parameters.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/basicInitParameters.R -\name{basic_Init_Parameters} -\alias{basic_Init_Parameters} -\title{Initialize the parameters in a basic way (zero for the conditional mean, -uniform for weights, identity for covariance matrices, and uniformly distributed forthe clustering)} -\usage{ -basic_Init_Parameters(n, p, m, k) -} -\arguments{ -\item{n}{sample size} - -\item{p}{number of covariates} - -\item{m}{size of the response} - -\item{k}{number of clusters} -} -\value{ -list with phiInit, rhoInit,piInit,gamInit -} -\description{ -Initialize the parameters in a basic way (zero for the conditional mean, -uniform for weights, identity for covariance matrices, and uniformly distributed forthe clustering) -} - diff --git a/man/discardSimilarModels.Rd b/man/discardSimilarModels.Rd deleted file mode 100644 index 7cef581..0000000 --- a/man/discardSimilarModels.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/discardSimilarModels.R -\name{discardSimilarModels} -\alias{discardSimilarModels} -\title{Discard models which have the same relevant variables} -\usage{ -discardSimilarModels(B1, B2, glambda, rho, pi) -} -\arguments{ -\item{B1}{array of relevant coefficients (of size p*m*length(gridlambda))} - -\item{B2}{array of irrelevant coefficients (of size p*m*length(gridlambda))} - -\item{glambda}{grid of regularization parameters (vector)} - -\item{rho}{covariance matrix (of size m*m*K*size(gridLambda))} - -\item{pi}{weight parameters (of size K*size(gridLambda))} -} -\value{ -a list with update B1, B2, glambda, rho and pi, and ind the vector of indices -of selected models. -} -\description{ -Discard models which have the same relevant variables -} - diff --git a/man/discardSimilarModels2.Rd b/man/discardSimilarModels2.Rd deleted file mode 100644 index 05c0e61..0000000 --- a/man/discardSimilarModels2.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/discardSimilarModels2.R -\name{discardSimilarModels2} -\alias{discardSimilarModels2} -\title{Similar to discardSimilarModels, for Lasso-rank procedure (focus on columns)} -\usage{ -discardSimilarModels2(B1, rho, pi) -} -\arguments{ -\item{B1}{array of relevant coefficients (of size p*m*length(gridlambda))} - -\item{rho}{covariance matrix} - -\item{pi}{weight parameters} -} -\value{ -a list with B1, in, rho, pi -} -\description{ -Similar to discardSimilarModels, for Lasso-rank procedure (focus on columns) -} - diff --git a/man/generateIO.Rd b/man/generateIO.Rd deleted file mode 100644 index bf8fb20..0000000 --- a/man/generateIO.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generateIO.R -\name{generateIO} -\alias{generateIO} -\title{Generate a sample of (X,Y) of size n} -\usage{ -generateIO(covX, covY, pi, beta, n) -} -\arguments{ -\item{covX}{covariance for covariates (of size p*p*K)} - -\item{covY}{covariance for the response vector (of size m*m*K)} - -\item{pi}{proportion for each cluster} - -\item{beta}{regression matrix} - -\item{n}{sample size} -} -\value{ -list with X and Y -} -\description{ -Generate a sample of (X,Y) of size n -} - diff --git a/man/generateIOdefault.Rd b/man/generateIOdefault.Rd deleted file mode 100644 index 332968e..0000000 --- a/man/generateIOdefault.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generateIOdefault.R -\name{generateIOdefault} -\alias{generateIOdefault} -\title{Generate a sample of (X,Y) of size n with default values} -\usage{ -generateIOdefault(n, p, m, k) -} -\arguments{ -\item{n}{sample size} - -\item{p}{number of covariates} - -\item{m}{size of the response} - -\item{k}{number of clusters} -} -\value{ -list with X and Y -} -\description{ -Generate a sample of (X,Y) of size n with default values -} - diff --git a/man/gridLambda.Rd b/man/gridLambda.Rd deleted file mode 100644 index 551c3d7..0000000 --- a/man/gridLambda.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gridLambda.R -\name{gridLambda} -\alias{gridLambda} -\title{Construct the data-driven grid for the regularization parameters used for the Lasso estimator} -\usage{ -gridLambda(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, tau) -} -\arguments{ -\item{phiInit}{value for phi} - -\item{piInit}{value for pi} - -\item{gamInit}{value for gamma} - -\item{mini}{minimum number of iterations in EM algorithm} - -\item{maxi}{maximum number of iterations in EM algorithm} - -\item{tau}{threshold to stop EM algorithm} - -\item{rhoInt}{value for rho} -} -\value{ -the grid of regularization parameters -} -\description{ -Construct the data-driven grid for the regularization parameters used for the Lasso estimator -} - diff --git a/man/indicesSelection.Rd b/man/indicesSelection.Rd deleted file mode 100644 index a727cdf..0000000 --- a/man/indicesSelection.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/indicesSelection.R -\name{indicesSelection} -\alias{indicesSelection} -\title{Construct the set of relevant indices -> ED: je crois que cette fonction n'est pas utile} -\usage{ -indicesSelection(phi, thresh = 1e-06) -} -\arguments{ -\item{phi}{regression matrix, of size p*m} - -\item{thresh}{threshold to say a cofficient is equal to zero} -} -\value{ -a list with A, a matrix with relevant indices (size = p*m) and B, a - matrix with irrelevant indices (size = p*m) -} -\description{ -Construct the set of relevant indices -> ED: je crois que cette fonction n'est pas utile -} - diff --git a/man/initSmallEM.Rd b/man/initSmallEM.Rd deleted file mode 100644 index 1b5db3f..0000000 --- a/man/initSmallEM.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/initSmallEM.R -\name{initSmallEM} -\alias{initSmallEM} -\title{initialization of the EM algorithm} -\usage{ -initSmallEM(k, X, Y, tau) -} -\arguments{ -\item{k}{number of components} - -\item{X}{matrix of covariates (of size n*p)} - -\item{Y}{matrix of responses (of size n*m)} - -\item{tau}{threshold to stop EM algorithm} -} -\value{ -a list with phiInit, rhoInit, piInit, gamInit -} -\description{ -initialization of the EM algorithm -} - diff --git a/man/modelSelection.Rd b/man/modelSelection.Rd deleted file mode 100644 index 650bf70..0000000 --- a/man/modelSelection.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelSelection.R -\name{modelSelection} -\alias{modelSelection} -\title{Among a collection of models, this function constructs a subcollection of models with -models having strictly different dimensions, keeping the model which minimizes -the likelihood if there were several with the same dimension} -\usage{ -modelSelection(LLF) -} -\arguments{ -\item{LLF}{a matrix, the first column corresponds to likelihoods for several models -the second column corresponds to the dimensions of the corresponding models.} -} -\value{ -a list with indices, a vector of indices selected models, - and D1, a vector of corresponding dimensions -} -\description{ -Among a collection of models, this function constructs a subcollection of models with -models having strictly different dimensions, keeping the model which minimizes -the likelihood if there were several with the same dimension -} - diff --git a/man/selectVariables.Rd b/man/selectVariables.Rd deleted file mode 100644 index 09a52f2..0000000 --- a/man/selectVariables.Rd +++ /dev/null @@ -1,49 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/selectVariables.R -\name{selectVariables} -\alias{selectVariables} -\title{selectVaribles -It is a function which construct, for a given lambda, the sets of -relevant variables and irrelevant variables.} -\usage{ -selectVariables(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, glambda, - X, Y, thres, tau) -} -\arguments{ -\item{phiInit}{an initial estimator for phi (size: p*m*k)} - -\item{rhoInit}{an initial estimator for rho (size: m*m*k)} - -\item{piInit}{an initial estimator for pi (size : k)} - -\item{gamInit}{an initial estimator for gamma} - -\item{mini}{minimum number of iterations in EM algorithm} - -\item{maxi}{maximum number of iterations in EM algorithm} - -\item{gamma}{power in the penalty} - -\item{glambda}{grid of regularization parameters} - -\item{X}{matrix of regressors} - -\item{Y}{matrix of responses} - -\item{thres}{threshold to consider a coefficient to be equal to 0} - -\item{tau}{threshold to say that EM algorithm has converged} -} -\value{ -TODO -} -\description{ -selectVaribles -It is a function which construct, for a given lambda, the sets of -relevant variables and irrelevant variables. -} -\examples{ -TODO - -} - diff --git a/man/vec_bin.Rd b/man/vec_bin.Rd deleted file mode 100644 index 1689488..0000000 --- a/man/vec_bin.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/vec_bin.R -\name{vec_bin} -\alias{vec_bin} -\title{A function needed in initSmallEM} -\usage{ -vec_bin(X, r) -} -\arguments{ -\item{X}{vector with integer values} - -\item{r}{integer} -} -\value{ -a list with Z (a binary vector of size the size of X) and indices where Z is equal to 1 -} -\description{ -A function needed in initSmallEM -} - diff --git a/src/sources/EMGLLF.c b/src/sources/EMGLLF.c index 87fd199..7e7a3d1 100644 --- a/src/sources/EMGLLF.c +++ b/src/sources/EMGLLF.c @@ -36,7 +36,6 @@ void EMGLLF_core( //S is already allocated, and doesn't need to be 'zeroed' //Other local variables - //NOTE: variables order is always [maxi],n,p,m,k Real* gam = (Real*)malloc(n*k*sizeof(Real)); copyArray(gamInit, gam, n*k); Real* b = (Real*)malloc(k*sizeof(Real)); @@ -54,6 +53,7 @@ void EMGLLF_core( Real* Gam = (Real*)malloc(n*k*sizeof(Real)); Real* X2 = (Real*)malloc(n*p*k*sizeof(Real)); Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real)); + Real* sqNorm2 = (Real*)malloc(k*sizeof(Real)); gsl_matrix* matrix = gsl_matrix_alloc(m, m); gsl_permutation* permutation = gsl_permutation_alloc(m); Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); @@ -61,8 +61,8 @@ void EMGLLF_core( Real dist = 0.; Real dist2 = 0.; int ite = 0; - Real EPS = 1e-15; - Real* dotProducts = (Real*)malloc(k*sizeof(Real)); + const Real EPS = 1e-15; + const Real gaussConstM = pow(2.*M_PI,m/2.); while (ite < mini || (ite < maxi && (dist >= tau || dist2 >= sqrt(tau)))) { @@ -75,19 +75,19 @@ void EMGLLF_core( { for (int mm=0; mm= 0) pi2AllPositive = 1; for (int r=0; r - Real dotProduct = 0.0; + Real dotProduct = 0.; for (int u=0; u1) sum(phi[1:(j-1),mm,r] * Gram2[j,1:(j-1),r]) else 0) + + // (if(j n*lambda*pow(pi[r],gamma)) - phi[ai(j,mm,r,p,m,k)] = (n*lambda*pow(pi[r],gamma) - S[ai(j,mm,r,p,m,k)]) + else if (S[ai(j,mm,r,p,m,k)] > n*lambda*pow_pir_gamma) + { + phi[ai(j,mm,r,p,m,k)] = (n*lambda*pow_pir_gamma - S[ai(j,mm,r,p,m,k)]) / Gram2[ai(j,j,r,p,p,k)]; + } else - phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pow(pi[r],gamma) + S[ai(j,mm,r,p,m,k)]) + { + phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pow_pir_gamma + S[ai(j,mm,r,p,m,k)]) / Gram2[ai(j,j,r,p,p,k)]; + } } } } @@ -265,18 +274,11 @@ void EMGLLF_core( Real sumLogLLF2 = 0.0; for (int i=0; i - dotProducts[r] = 0.0; + //compute sq norm || Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) ||_2^2 + sqNorm2[r] = 0.0; for (int u=0; u EPS - ? Gam[mi(i,r,n,k)] / sumGamI - : 0.0; + //gam[i,] = Gam[i,] / sumGamI + gam[mi(i,r,n,k)] = sumGamI > EPS ? Gam[mi(i,r,n,k)] / sumGamI : 0.; } } - - //sum(pen(ite,:)) + + //sumPen = sum(pi^gamma * b) Real sumPen = 0.0; for (int r=0; r dist2) dist2 = Dist3; - + ite++; } - + //free memory free(b); free(gam); @@ -409,5 +409,5 @@ void EMGLLF_core( free(pi2); free(X2); free(Y2); - free(dotProducts); + free(sqNorm2); } diff --git a/src/sources/constructionModelesLassoRank.c b/src/sources/constructionModelesLassoRank.c index 2be1982..59be2f7 100644 --- a/src/sources/constructionModelesLassoRank.c +++ b/src/sources/constructionModelesLassoRank.c @@ -7,8 +7,8 @@ // TODO: comment on constructionModelesLassoRank purpose void constructionModelesLassoRank_core( // IN parameters - const Real* Pi,// parametre initial des proportions - const Real* Rho, // parametre initial de variance renormalisé + const Real* pi,// parametre initial des proportions + const Real* rho, // parametre initial de variance renormalisé int mini, // nombre minimal d'itérations dans l'algorithme EM int maxi, // nombre maximal d'itérations dans l'algorithme EM const Real* X,// régresseurs @@ -31,18 +31,22 @@ void constructionModelesLassoRank_core( int deltaRank = rangmax-rangmin+1; int Size = (int)pow(deltaRank,k); int* Rank = (int*)malloc(Size*k*sizeof(int)); -for (int r=0; r= tau || dist2 >= sqrt(tau)))){ + while(ite <= mini || (ite<= maxi && (dist>= tau || dist2 >= sqrt(tau)))) + { Phi = phi Rho = rho - PI = Pi + Pi = pi + #calcul associé à Y et X - for(r in 1:k){ - for(mm in 1:m){ - Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm] ##bon calcul ? idem pour X2 ??... - } - for(i in 1:n){ - X2[i,,r] = X[i,] *sqrt(gam[i,r]) - } - for(mm in 1:m){ + for(r in 1:k) + { + for (mm in 1:m) + Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm] + for (i in 1:n) + X2[i,,r] = sqrt(gam[i,r]) * X[i,] + for (mm in 1:m) ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r]) - } - for(j in 1:p){ - for(s in 1:p){ + for (j in 1:p) + { + for (s in 1:p) Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r]) - } } } @@ -58,116 +58,106 @@ EMGLLF = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau) ########## #pour pi - for(r in 1:k){ - b[r] = sum(sum(abs(phi[,,r]))) - } + for (r in 1:k) + b[r] = sum(abs(phi[,,r])) gam2 = colSums(gam) - a = sum(gam%*%(log(Pi))) + a = sum(gam %*% log(pi)) #tant que les props sont negatives kk = 0 pi2AllPositive = FALSE - while(pi2AllPositive == FALSE){ - Pi2 = Pi + 0.1^kk * ((1/n)*gam2 - Pi) - pi2AllPositive = TRUE - for(r in 1:k){ - if(Pi2[r] < 0){ - pi2AllPositive = false; - break - } - } + while (!pi2AllPositive) + { + pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi) + pi2AllPositive = all(pi2 >= 0) kk = kk+1 } - #t[m]la plus grande valeur dans la grille O.1^k tel que ce soit - #décroissante ou constante - while((-1/n*a+lambda*((Pi^gamma)%*%t(b)))<(-1/n*gam2%*%t(log(Pi2))+lambda*(Pi2^gamma)%*%t(b)) && kk<1000){ - Pi2 = Pi+0.1^kk*(1/n*gam2-Pi) - kk = kk+1 + #t[m] la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante + while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < + -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) ) + { + pi2 = pi + 0.1^kk * (1/n*gam2 - pi) + kk = kk + 1 } - t = 0.1^(kk) - Pi = (Pi+t*(Pi2-Pi)) / sum(Pi+t*(Pi2-Pi)) + t = 0.1^kk + pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi)) #Pour phi et rho - for(r in 1:k){ - for(mm in 1:m){ - for(i in 1:n){ - ps1[i,mm,r] = Y2[i,mm,r] * (X2[i,,r]%*%(phi[,mm,r])) - nY21[i,mm,r] = (Y2[i,mm,r])^2 + for (r in 1:k) + { + for (mm in 1:m) + { + for (i in 1:n) + { + ps1[i,mm,r] = Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r]) + nY21[i,mm,r] = Y2[i,mm,r]^2 } ps[mm,r] = sum(ps1[,mm,r]) nY2[mm,r] = sum(nY21[,mm,r]) - rho[mm,mm,r] = ((ps[mm,r]+sqrt(ps[mm,r]^2+4*nY2[mm,r]*(gam2[r])))/(2*nY2[mm,r])) + rho[mm,mm,r] = (ps[mm,r]+sqrt(ps[mm,r]^2+4*nY2[mm,r]*gam2[r])) / (2*nY2[mm,r]) } } - for(r in 1:k){ - p1 = p-1 - for(j in 1:p1){ - for(mm in 1:m){ - j1 = j-1 - j2 = j+1 - v1 = c(1:j1) - v2 = c(j2:p) - S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + phi[v1,mm,r]%*%(Gram2[j,v1,r]) + phi[v2,mm,r]%*%(Gram2[j,v2,r]) #erreur indice - if(abs(S[j,mm,r]) <= n*lambda*(Pi[r]^gamma)){ + for (r in 1:k) + { + for (j in 1:p) + { + for (mm in 1:m) + { + S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + + (if(j>1) sum(phi[1:(j-1),mm,r] * Gram2[j,1:(j-1),r]) else 0) + + (if(j n*lambda*(Pi[r]^gamma)){ - phi[j,mm,r] = (n*lambda*(Pi[r]^gamma)-S[j,mm,r])/Gram2[j,j,r] - }else{ - phi[j,mm,r] = -(n*lambda*(Pi[r]^gamma)+S[j,mm,r])/Gram2[j,j,r] - } - } + else if(S[j,mm,r] > n*lambda*(pi[r]^gamma)) + phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r] + else + phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r] } } } - + ########## #Etape E # ########## sumLogLLF2 = 0 - for(i in 1:n){ - #precompute dot products to numerically adjust their values - dotProducts = rep(0,k) - for(r in 1:k){ - dotProducts[r] = tcrossprod(Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r]) - } - shift = 0.5*min(dotProducts) - + for (i in 1:n) + { + #precompute sq norms to numerically adjust their values + sqNorm2 = rep(0,k) + for (r in 1:k) + sqNorm2[r] = sum( (Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2 ) + shift = 0.5*min(sqNorm2) + #compute Gam(:,:) using shift determined above sumLLF1 = 0.0; - for(r in 1:k){ - Gam[i,r] = Pi[r]*det(rho[,,r])*exp(-0.5*dotProducts[r] + shift) - sumLLF1 = sumLLF1 + Gam[i,r]/(2*pi)^(m/2) + for (r in 1:k) + { + #FIXME: numerical problems, because 0 < det(Rho[,,r] < EPS; what to do ?! + # consequence: error in while() at line 77 + Gam[i,r] = pi[r] * exp(-0.5*sqNorm2[r] + shift) * det(rho[,,r]) + sumLLF1 = sumLLF1 + Gam[i,r] / (2*base::pi)^(m/2) } sumLogLLF2 = sumLogLLF2 + log(sumLLF1) sumGamI = sum(Gam[i,]) if(sumGamI > EPS) gam[i,] = Gam[i,] / sumGamI else - gam[i,] = rep(0,k) - } - - - sumPen = 0 - for(r in 1:k){ - sumPen = sumPen + Pi[r]^gamma^b[r] + gam[i,] = rep(0,k) } - LLF[ite] = -(1/n)*sumLogLLF2 + lambda*sumPen - - if(ite == 1) - dist = LLF[ite] - else - dist = (LLF[ite]-LLF[ite-1])/(1+abs(LLF[ite])) - - Dist1=max(max(max((abs(phi-Phi))/(1+abs(phi))))) - Dist2=max(max(max((abs(rho-Rho))/(1+abs(rho))))) - Dist3=max(max((abs(Pi-PI))/(1+abs(PI)))) - dist2=max(c(Dist1,Dist2,Dist3)) - - ite=ite+1 + + sumPen = sum(pi^gamma * b) + LLF[ite] = -sumLogLLF2/n + lambda*sumPen + + dist = ifelse( ite == 1, LLF[ite], (LLF[ite]-LLF[ite-1]) / (1+abs(LLF[ite])) ) + + Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) ) + Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) + Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) ) + dist2 = max(Dist1,Dist2,Dist3) + + ite = ite+1 } - - Pi = t(Pi) - return(list("phi"=phi, "rho"=rho, "pi"=Pi, "LLF"=LLF, "S"=S)) + + return(list("phi"=phi, "rho"=rho, "pi"=pi, "LLF"=LLF, "S"=S)) } diff --git a/src/test/generate_test_data/helpers/EMGLLF.m b/src/test/generate_test_data/helpers/EMGLLF.m deleted file mode 100644 index 618ffba..0000000 --- a/src/test/generate_test_data/helpers/EMGLLF.m +++ /dev/null @@ -1,174 +0,0 @@ -function[phi,rho,pi,LLF,S] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau) - - %Get matrices dimensions - PI = 4.0 * atan(1.0); - n = size(X, 1); - [p,m,k] = size(phiInit); - - %Initialize outputs - phi = phiInit; - rho = rhoInit; - pi = piInit; - LLF = zeros(maxi,1); - S = zeros(p,m,k); - - %Other local variables - %NOTE: variables order is always n,p,m,k - gam = gamInit; - Gram2 = zeros(p,p,k); - ps2 = zeros(p,m,k); - b = zeros(k,1); - pen = zeros(maxi,k); - X2 = zeros(n,p,k); - Y2 = zeros(n,m,k); - dist = 0; - dist2 = 0; - ite = 1; - pi2 = zeros(k,1); - ps = zeros(m,k); - nY2 = zeros(m,k); - ps1 = zeros(n,m,k); - nY21 = zeros(n,m,k); - Gam = zeros(n,k); - EPS = 1e-15; - - while ite<=mini || (ite<=maxi && (dist>=tau || dist2>=sqrt(tau))) - - Phi = phi; - Rho = rho; - Pi = pi; - - %Calculs associés à Y et X - for r=1:k - for mm=1:m - Y2(:,mm,r) = sqrt(gam(:,r)) .* Y(:,mm); - end - for i=1:n - X2(i,:,r) = X(i,:) .* sqrt(gam(i,r)); - end - for mm=1:m - ps2(:,mm,r) = transpose(X2(:,:,r)) * Y2(:,mm,r); - end - for j=1:p - for s=1:p - Gram2(j,s,r) = dot(X2(:,j,r), X2(:,s,r)); - end - end - end - - %%%%%%%%%% - %Etape M % - %%%%%%%%%% - - %Pour pi - for r=1:k - b(r) = sum(sum(abs(phi(:,:,r)))); - end - gam2 = sum(gam,1); - a = sum(gam*transpose(log(pi))); - - %tant que les proportions sont negatives - kk = 0; - pi2AllPositive = false; - while ~pi2AllPositive - pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi); - pi2AllPositive = true; - for r=1:k - if pi2(r) < 0 - pi2AllPositive = false; - break; - end - end - kk = kk+1; - end - - %t(m) la plus grande valeur dans la grille O.1^k tel que ce soit - %décroissante ou constante - while (-1/n*a+lambda*((pi.^gamma)*b))<(-1/n*gam2*transpose(log(pi2))+lambda.*(pi2.^gamma)*b) && kk<1000 - pi2 = pi+0.1^kk*(1/n*gam2-pi); - kk = kk+1; - end - t = 0.1^(kk); - pi = (pi+t*(pi2-pi)) / sum(pi+t*(pi2-pi)); - - %Pour phi et rho - for r=1:k - for mm=1:m - for i=1:n - ps1(i,mm,r) = Y2(i,mm,r) * dot(X2(i,:,r), phi(:,mm,r)); - nY21(i,mm,r) = (Y2(i,mm,r))^2; - end - ps(mm,r) = sum(ps1(:,mm,r)); - nY2(mm,r) = sum(nY21(:,mm,r)); - rho(mm,mm,r) = ((ps(mm,r)+sqrt(ps(mm,r)^2+4*nY2(mm,r)*(gam2(r))))/(2*nY2(mm,r))); - end - end - for r=1:k - for j=1:p - for mm=1:m - S(j,mm,r) = -rho(mm,mm,r)*ps2(j,mm,r) + dot(phi(1:j-1,mm,r),Gram2(j,1:j-1,r)')... - + dot(phi(j+1:p,mm,r),Gram2(j,j+1:p,r)'); - if abs(S(j,mm,r)) <= n*lambda*(pi(r)^gamma) - phi(j,mm,r)=0; - else - if S(j,mm,r)> n*lambda*(pi(r)^gamma) - phi(j,mm,r)=(n*lambda*(pi(r)^gamma)-S(j,mm,r))/Gram2(j,j,r); - else - phi(j,mm,r)=-(n*lambda*(pi(r)^gamma)+S(j,mm,r))/Gram2(j,j,r); - end - end - end - end - end - - %%%%%%%%%% - %Etape E % - %%%%%%%%%% - - sumLogLLF2 = 0.0; - for i=1:n - %precompute dot products to numerically adjust their values - dotProducts = zeros(k,1); - for r=1:k - dotProducts(r)= (Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r)); - end - shift = 0.5*min(dotProducts); - - %compute Gam(:,:) using shift determined above - sumLLF1 = 0.0; - for r=1:k - Gam(i,r) = pi(r)*det(rho(:,:,r))*exp(-0.5*dotProducts(r) + shift); - sumLLF1 = sumLLF1 + Gam(i,r)/(2*PI)^(m/2); - end - sumLogLLF2 = sumLogLLF2 + log(sumLLF1); - sumGamI = sum(Gam(i,:)); - if sumGamI > EPS - gam(i,:) = Gam(i,:) / sumGamI; - else - gam(i,:) = zeros(k,1); - end - end - - sumPen = 0.0; - for r=1:k - sumPen = sumPen + pi(r).^gamma .* b(r); - end - LLF(ite) = -(1/n)*sumLogLLF2 + lambda*sumPen; - - if ite == 1 - dist = LLF(ite); - else - dist = (LLF(ite)-LLF(ite-1))/(1+abs(LLF(ite))); - end - - Dist1=max(max(max((abs(phi-Phi))./(1+abs(phi))))); - Dist2=max(max(max((abs(rho-Rho))./(1+abs(rho))))); - Dist3=max(max((abs(pi-Pi))./(1+abs(Pi)))); - dist2=max([Dist1,Dist2,Dist3]); - - ite=ite+1; - end - - pi = transpose(pi); - -end diff --git a/src/test/generate_test_data/helpers/EMGrank.R b/src/test/generate_test_data/helpers/EMGrank.R index d419813..346916b 100644 --- a/src/test/generate_test_data/helpers/EMGrank.R +++ b/src/test/generate_test_data/helpers/EMGrank.R @@ -7,7 +7,8 @@ matricize <- function(X) } require(MASS) -EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ +EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank) +{ #matrix dimensions n = dim(X)[1] p = dim(X)[2] @@ -17,18 +18,17 @@ EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ #init outputs phi = array(0, dim=c(p,m,k)) Z = rep(1, n) -# Pi = piInit LLF = 0 #local variables Phi = array(0, dim=c(p,m,k)) - deltaPhi = c(0) - sumDeltaPhi = 0 + deltaPhi = c() + sumDeltaPhi = 0. deltaPhiBufferSize = 20 #main loop ite = 1 - while(ite<=mini || (ite<=maxi && sumDeltaPhi>tau)) + while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau)) { #M step: Mise à jour de Beta (et donc phi) for(r in 1:k) @@ -40,46 +40,45 @@ EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*% crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) ) S = s$d - U = s$u - V = s$v #Set m-rank(r) singular values to zero, and recompose #best rank(r) approximation of the initial product if(rank[r] < length(S)) S[(rank[r]+1):length(S)] = 0 - phi[,,r] = U %*% diag(S) %*% t(V) %*% Rho[,,r] + phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r] } - + #Etape E et calcul de LLF sumLogLLF2 = 0 - for(i in 1:n){ + for(i in seq_len(n)) + { sumLLF1 = 0 maxLogGamIR = -Inf - for(r in 1:k){ + for (r in seq_len(k)) + { dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r]) logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct #Z[i] = index of max (gam[i,]) - if(logGamIR > maxLogGamIR){ + if(logGamIR > maxLogGamIR) + { Z[i] = r maxLogGamIR = logGamIR } - sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2) + sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2) } sumLogLLF2 = sumLogLLF2 + log(sumLLF1) } LLF = -1/n * sumLogLLF2 - + #update distance parameter to check algorithm convergence (delta(phi, Phi)) - deltaPhi = c(deltaPhi, max(max(max((abs(phi-Phi))/(1+abs(phi))))) ) - if(length(deltaPhi) > deltaPhiBufferSize){ - l_1 = c(2:length(deltaPhi)) - deltaPhi = deltaPhi[l_1] - } + deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain? + if (length(deltaPhi) > deltaPhiBufferSize) + deltaPhi = deltaPhi[2:length(deltaPhi)] sumDeltaPhi = sum(abs(deltaPhi)) - + #update other local variables Phi = phi ite = ite+1 } - return(list(phi=phi, LLF=LLF)) + return(list("phi"=phi, "LLF"=LLF)) } diff --git a/src/test/generate_test_data/helpers/EMGrank.m b/src/test/generate_test_data/helpers/EMGrank.m deleted file mode 100644 index 76074b7..0000000 --- a/src/test/generate_test_data/helpers/EMGrank.m +++ /dev/null @@ -1,69 +0,0 @@ -function[phi,LLF] = EMGrank(Pi,Rho,mini,maxi,X,Y,tau,rank) - - % get matrix sizes - [~,m,k] = size(Rho); - [n,p] = size(X); - - % allocate output matrices - phi = zeros(p,m,k); - Z = ones(n,1,'int64'); - LLF = 0.0; - - % local variables - Phi = zeros(p,m,k); - deltaPhi = 0.0; - deltaPhi = []; - sumDeltaPhi = 0.0; - deltaPhiBufferSize = 20; - - %main loop (at least mini iterations) - ite = int64(1); - while ite<=mini || (ite<=maxi && sumDeltaPhi>tau) - - %M step: Mise à jour de Beta (et donc phi) - for r=1:k - if (sum(Z==r) == 0) - continue; - end - %U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr - [U,S,V] = svd(pinv(transpose(X(Z==r,:))*X(Z==r,:))*transpose(X(Z==r,:))*Y(Z==r,:)); - %Set m-rank(r) singular values to zero, and recompose - %best rank(r) approximation of the initial product - S(rank(r)+1:end,:) = 0; - phi(:,:,r) = U * S * transpose(V) * Rho(:,:,r); - end - - %Etape E et calcul de LLF - sumLogLLF2 = 0.0; - for i=1:n - sumLLF1 = 0.0; - maxLogGamIR = -Inf; - for r=1:k - dotProduct = (Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r)); - logGamIR = log(Pi(r)) + log(det(Rho(:,:,r))) - 0.5*dotProduct; - %Z(i) = index of max (gam(i,:)) - if logGamIR > maxLogGamIR - Z(i) = r; - maxLogGamIR = logGamIR; - end - sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2); - end - sumLogLLF2 = sumLogLLF2 + log(sumLLF1); - end - - LLF = -1/n * sumLogLLF2; - - % update distance parameter to check algorithm convergence (delta(phi, Phi)) - deltaPhi = [ deltaPhi, max(max(max((abs(phi-Phi))./(1+abs(phi))))) ]; - if length(deltaPhi) > deltaPhiBufferSize - deltaPhi = deltaPhi(2:length(deltaPhi)); - end - sumDeltaPhi = sum(abs(deltaPhi)); - - % update other local variables - Phi = phi; - ite = ite+1; - - end - -end diff --git a/src/test/generate_test_data/helpers/basicInitParameters.m b/src/test/generate_test_data/helpers/basicInitParameters.m deleted file mode 100644 index 50410f5..0000000 --- a/src/test/generate_test_data/helpers/basicInitParameters.m +++ /dev/null @@ -1,19 +0,0 @@ -function[phiInit,rhoInit,piInit,gamInit] = basicInitParameters(n,p,m,k) - - phiInit = zeros(p,m,k); - - piInit = (1.0/k) * ones(1,k); - - rhoInit = zeros(m,m,k); - for r=1:k - rhoInit(:,:,r) = eye(m,m); - end - - gamInit = 0.1 * ones(n,k); - R = random('unid',k,n,1); - for i=1:n - gamInit(i,R(i)) = 0.9; - end - gamInit = gamInit / (sum(gamInit(1,:))); - -end diff --git a/src/test/generate_test_data/helpers/checkOutput.R b/src/test/generate_test_data/helpers/checkOutput.R deleted file mode 100644 index ae2fbdf..0000000 --- a/src/test/generate_test_data/helpers/checkOutput.R +++ /dev/null @@ -1,10 +0,0 @@ -checkOutput = function(varName, array, refArray, tol) -{ - print(paste("Checking ",varName,sep="")) - maxError = max(abs(array - refArray)) - if(maxError >= tol){ - print(paste("Inaccuracy: max(abs(error)) = ",maxError," >= ",tol,sep="")) - } else{ - print("OK") - } -} diff --git a/src/test/generate_test_data/helpers/constructionModelesLassoRank.R b/src/test/generate_test_data/helpers/constructionModelesLassoRank.R index ad4f725..15305d9 100644 --- a/src/test/generate_test_data/helpers/constructionModelesLassoRank.R +++ b/src/test/generate_test_data/helpers/constructionModelesLassoRank.R @@ -1,34 +1,49 @@ -constructionModelesLassoRank = function(Pi,Rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax){ +constructionModelesLassoRank = function(pi,rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax) +{ #get matrix sizes n = dim(X)[1] p = dim(X)[2] m = dim(rho)[2] k = dim(rho)[3] L = dim(A1)[2] - + + # On cherche les rangs possiblement intéressants deltaRank = rangmax - rangmin + 1 Size = deltaRank^k - Rank = matrix(0, Size, k) -# for(r in 1:k) { -# Rank[,r] = rangmin + <--- #FIXME: -# } - + Rank = matrix(0, nrow=Size, ncol=k) + for(r in 1:k) + { + # On veut le tableau de toutes les combinaisons de rangs possibles + # Dans la première colonne : on répète (rangmax-rangmin)^(k-1) chaque chiffre : + # ça remplit la colonne + # Dans la deuxieme : on répète (rangmax-rangmin)^(k-2) chaque chiffre, + # et on fait ça (rangmax-rangmin)^2 fois + # ... + # Dans la dernière, on répète chaque chiffre une fois, + # et on fait ça (rangmin-rangmax)^(k-1) fois. + Rank[,r] = rangmin + rep(0:(deltaRank-1), deltaRank^(r-1), each=deltaRank^(k-r)) + } + + # output parameters phi = array(0, dim=c(p,m,k,L*Size)) llh = matrix(0, L*Size, 2) #log-likelihood - for(lambdaIndex in 1:L){ - #on ne garde que les colonnes actives - #active sera l'ensemble des variables informatives - active = A1[, lambdaIndex] - active[active==0] = c() - if(length(active)>0){ - for(j in 1:Size){ - EMG_rank = EMGrank(Pi[,lambdaIndex], Rho[,,,lambdaIndex], mini, maxi, X[, active], Y, tau, Rank[j,]) - phiLambda = EMG_rank$phi - LLF = EMG_rank$LLF - llh[(lambdaIndex-1)*Size+j,] = c(LLF, sum(Rank[j,]^(length(active)- Rank[j,]+m))) - phi[active,,,(lambdaIndex-1)*Size+j] = phiLambda + for(lambdaIndex in 1:L) + { + # on ne garde que les colonnes actives + # 'active' sera l'ensemble des variables informatives + active = A1[,lambdaIndex] + active = active[-(active==0)] + if (length(active) > 0) + { + for (j in 1:Size) + { + res = EMGrank(Pi[,lambdaIndex], Rho[,,,lambdaIndex], mini, maxi, + X[,active], Y, tau, Rank[j,]) + llh[(lambdaIndex-1)*Size+j,] = + c( res$LLF, sum(Rank[j,] * (length(active)- Rank[j,] + m)) ) + phi[active,,,(lambdaIndex-1)*Size+j] = res$phi } } } - return(list(phi=phi, llh = llh)) + return (list(phi=phi, llh = llh)) } diff --git a/src/test/generate_test_data/helpers/constructionModelesLassoRank.m b/src/test/generate_test_data/helpers/constructionModelesLassoRank.m deleted file mode 100644 index 6279416..0000000 --- a/src/test/generate_test_data/helpers/constructionModelesLassoRank.m +++ /dev/null @@ -1,40 +0,0 @@ -function[phi,llh] = constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax) - - PI = 4.0 * atan(1.0); - - %get matrix sizes - [n,p] = size(X); - [~,m,k,~] = size(Rho); - L = size(A1, 2); %A1 est p x m+1 x L ou p x L ?! - - %On cherche les rangs possiblement intéressants - deltaRank = rangmax - rangmin + 1; - Size = deltaRank^k; - Rank = zeros(Size,k,'int64'); - for r=1:k - %On veut le tableau de toutes les combinaisons de rangs possibles - %Dans la première colonne : on répète (rangmax-rangmin)^(k-1) chaque chiffre : ca remplit la colonne - %Dans la deuxieme : on répète (rangmax-rangmin)^(k-2) chaque chiffre, et on fait ca (rangmax-rangmin)^2 fois - %... - %Dans la dernière, on répète chaque chiffre une fois, et on fait ca (rangmin-rangmax)^(k-1) fois. - Rank(:,r) = rangmin + reshape(repmat(0:(deltaRank-1), deltaRank^(k-r), deltaRank^(r-1)), Size, 1); - end - - %output parameters - phi = zeros(p,m,k,L*Size); - llh = zeros(L*Size,2); - for lambdaIndex=1:L - %On ne garde que les colonnes actives - %active sera l'ensemble des variables informatives - active = A1(:,lambdaIndex); - active(active==0) = []; - if length(active) > 0 - for j=1:Size - [phiLambda,LLF] = EMGrank(Pi(:,lambdaIndex),Rho(:,:,:,lambdaIndex),mini,maxi,X(:,active),Y,tau,Rank(j,:)); - llh((lambdaIndex-1)*Size+j,:) = [LLF, sum(Rank(j,:) .* (length(active)-Rank(j,:)+m))]; - phi(active,:,:,(lambdaIndex-1)*Size+j) = phiLambda; - end - end - end - -end diff --git a/src/test/generate_test_data/helpers/covariance.R b/src/test/generate_test_data/helpers/covariance.R deleted file mode 100644 index 09a9ec5..0000000 --- a/src/test/generate_test_data/helpers/covariance.R +++ /dev/null @@ -1,8 +0,0 @@ -covariance = function(p,a) -{ - A = matrix(a, p,p) - for (i in 1:p){ - A[i,] = A[i,]^abs(i-(1:p)) - } - return (A) -} diff --git a/src/test/generate_test_data/helpers/generateIO.m b/src/test/generate_test_data/helpers/generateIO.m deleted file mode 100644 index c677fd2..0000000 --- a/src/test/generate_test_data/helpers/generateIO.m +++ /dev/null @@ -1,37 +0,0 @@ -%X is generated following a gaussian mixture \sum pi_r N(meanX_k, covX_r) -%Y is generated then, with Y_i ~ \sum pi_r N(Beta_r.X_i, covY_r) -function[X,Y,Z] = generateIO(meanX, covX, covY, pi, beta, n) - - [p, ~, k] = size(covX); - [m, ~, ~] = size(covY); - if exist('octave_config_info') - %Octave statistics package doesn't have gmdistribution() - X = zeros(n, p); - Z = zeros(n); - cs = cumsum(pi); - for i=1:n - %TODO: vectorize ? http://stackoverflow.com/questions/2977497/weighted-random-numbers-in-matlab - tmpRand01 = rand(); - [~,Z(i)] = min(cs - tmpRand01 >= 0); - X(i,:) = mvnrnd(meanX(Z(i),:), covX(:,:,Z(i)), 1); - end - else - gmDistX = gmdistribution(meanX, covX, pi); - [X, Z] = random(gmDistX, n); - end - - Y = zeros(n, m); - BX = zeros(n,m,k); - for i=1:n - for r=1:k - %compute beta_r . X_i - BXir = zeros(1, m); - for mm=1:m - BXir(mm) = dot(X(i,:), beta(:,mm,r)); - end - %add pi(r) * N(beta_r . X_i, covY) to Y_i - Y(i,:) = Y(i,:) + pi(r) * mvnrnd(BXir, covY(:,:,r), 1); - end - end - -end diff --git a/src/test/generate_test_data/helpers/generateIOdefault.m b/src/test/generate_test_data/helpers/generateIOdefault.m deleted file mode 100644 index f4c3c1f..0000000 --- a/src/test/generate_test_data/helpers/generateIOdefault.m +++ /dev/null @@ -1,23 +0,0 @@ -%call generateIO with default parameters (random means, covariances = identity, equirepartition) -function[X,Y,Z] = generateIOdefault(n, p, m, k) - - rangeX = 100; - meanX = rangeX * (1 - 2*rand(k, p)); - covX = zeros(p,p,k); - covY = zeros(m,m,k); - for r=1:k - covX(:,:,r) = eye(p); - covY(:,:,r) = eye(m); - end - pi = (1/k) * ones(1,k); - - %initialize beta to a random number of non-zero random value - beta = zeros(p,m,k); - for j=1:p - nonZeroCount = ceil(m*rand(1)); - beta(j,1:nonZeroCount,:) = rand(nonZeroCount, k); - end - - [X,Y,Z] = generateIO(meanX, covX, covY, pi, beta, n); - -end diff --git a/src/test/test.constructionModelesLassoRank.c b/src/test/test.constructionModelesLassoRank.c index b3ed34a..f60ffea 100644 --- a/src/test/test.constructionModelesLassoRank.c +++ b/src/test/test.constructionModelesLassoRank.c @@ -1,6 +1,7 @@ #include "constructionModelesLassoRank.h" #include "test_utils.h" #include +#include int main(int argc, char** argv) { @@ -14,8 +15,8 @@ int main(int argc, char** argv) //////////// // INPUTS // - Real* Pi = readArray_real("Pi"); - Real* Rho = readArray_real("Rho"); + Real* pi = readArray_real("pi"); + Real* rho = readArray_real("rho"); int mini = read_int("mini"); int maxi = read_int("maxi"); Real* X = readArray_real("X"); @@ -36,13 +37,13 @@ int main(int argc, char** argv) ///////////////////////////////////////// // Call to constructionModelesLassoMLE // constructionModelesLassoRank_core( - Pi,Rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax, + pi,rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax, phi,llh, n,p,m,k,L); ///////////////////////////////////////// - free(Rho); - free(Pi); + free(rho); + free(pi); free(X); free(Y); free(A1);