From: emilie Date: Tue, 6 Dec 2016 12:11:45 +0000 (+0100) Subject: utilisation de k-means au lieu de hierarchique dans initSmallEM - PB de dimensions... X-Git-Url: https://git.auder.net/game/%22%20%20%20data.gameInfo.gameId%20%20%20%22?a=commitdiff_plain;h=f2a9120810d7e1e423c7b5c2c4320f4e27221f50;p=valse.git utilisation de k-means au lieu de hierarchique dans initSmallEM - PB de dimensions dans generateI0 - cosmétique --- diff --git a/DESCRIPTION b/DESCRIPTION index 8902f1a..07caf92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,3 +11,4 @@ Depends: LazyData: yes URL: http://git.auder.net/?p=valse.git License: MIT +RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..c625f7e --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,11 @@ +# Generated by roxygen2: do not edit by hand + +export(basic_Init_Parameters) +export(discardSimilarModels) +export(discardSimilarModels2) +export(generateIO) +export(generateIOdefault) +export(gridLambda) +export(indicesSelection) +export(initSmallEM) +export(modelSelection) diff --git a/R/discardSimilarModels2.R b/R/discardSimilarModels2.R index d620bff..b59c1ba 100644 --- a/R/discardSimilarModels2.R +++ b/R/discardSimilarModels2.R @@ -4,10 +4,9 @@ #' @param rho covariance matrix #' @param pi weight parameters #' -#' @return +#' @return a list with B1, in, rho, pi #' @export #' -#' @examples discardSimilarModels2 = function(B1,rho,pi) { ind = c() dim_B1 = dim(B1) @@ -16,6 +15,6 @@ discardSimilarModels2 = function(B1,rho,pi) glambda = rep(0,sizeLambda) suppressmodel = discardSimilarModels(B1,B2,glambda,rho,pi) - return (list(B1 = suppressmodel$B1, ind = suppressmodel$B2, + return (list(B1 = suppressmodel$B1, ind = suppressmodel$ind, rho = suppressmodel$rho, pi = suppressmodel$pi)) } diff --git a/R/generateIO.R b/R/generateIO.R index 83d8cc9..0e776d0 100644 --- a/R/generateIO.R +++ b/R/generateIO.R @@ -1,20 +1,19 @@ #' Generate a sample of (X,Y) of size n -#' @param covX covariance for covariates -#' @param covY covariance for the response vector +#' @param covX covariance for covariates (of size p*p*K) +#' @param covY covariance for the response vector (of size m*m*K) #' @param pi proportion for each cluster #' @param beta regression matrix #' @param n sample size +#' #' @return list with X and Y #' @export #----------------------------------------------------------------------- generateIO = function(covX, covY, pi, beta, n) { - size_covX = dim(covX) - p = size_covX[1] - k = size_covX[3] + p = dim(covX)[1] - size_covY = dim(covY) - m = size_covY[1] + m = dim(covY)[1] + k = dim(covY)[3] Y = matrix(0,n,m) BX = array(0, dim=c(n,m,k)) diff --git a/R/indicesSelection.R b/R/indicesSelection.R index 0445406..1adece6 100644 --- a/R/indicesSelection.R +++ b/R/indicesSelection.R @@ -1,4 +1,4 @@ -#' Construct the set of relevant indices +#' Construct the set of relevant indices -> ED: je crois que cette fonction n'est pas utile #' #' @param phi regression matrix, of size p*m #' @param thresh threshold to say a cofficient is equal to zero diff --git a/R/initSmallEM.R b/R/initSmallEM.R index 1fa2d9b..8cfb7e8 100644 --- a/R/initSmallEM.R +++ b/R/initSmallEM.R @@ -22,12 +22,11 @@ initSmallEM = function(k,X,Y,tau) LLFinit1 = list() require(MASS) #Moore-Penrose generalized inverse of matrix + require(mclust) # K-means with selection of K for(repet in 1:20) { - clusters = hclust(dist(y)) #default distance : euclidean - #cutree retourne les indices (? quel cluster indiv_i appartient) d'un clustering hierarchique - clusterCut = cutree(clusters,k) - Zinit1[,repet] = clusterCut + clusters = Mclust(matrix(c(X,Y),nrow=n),k) #default distance : euclidean + Zinit1[,repet] = clusters$classification for(r in 1:k) { diff --git a/R/main.R b/R/main.R index 2eec878..9817b00 100644 --- a/R/main.R +++ b/R/main.R @@ -39,7 +39,7 @@ SelMix = setRefClass( # initialisation for the allocations probabilities in each component tauInit, # values for the regularization parameter grid - gridLambda = []; + gridLambda = c(), # je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit # une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ... A1, @@ -52,7 +52,7 @@ SelMix = setRefClass( Pi, #immutable - seuil = 1e-15; + seuil = 1e-15 ), methods = list( diff --git a/R/modelSelection.R b/R/modelSelection.R index 8020daa..5a79bb6 100644 --- a/R/modelSelection.R +++ b/R/modelSelection.R @@ -9,7 +9,6 @@ #' and D1, a vector of corresponding dimensions #' @export #' -#' @examples modelSelection = function(LLF) { D = LLF[,2] diff --git a/R/modelSelectionCrit.R b/R/modelSelectionCrit.R new file mode 100644 index 0000000..81f373d --- /dev/null +++ b/R/modelSelectionCrit.R @@ -0,0 +1,2 @@ +## Programme qui sélectionne un modèle +## proposer à l'utilisation différents critères (BIC, AIC, slope heuristic) \ No newline at end of file diff --git a/man/basic_Init_Parameters.Rd b/man/basic_Init_Parameters.Rd new file mode 100644 index 0000000..3be2e2d --- /dev/null +++ b/man/basic_Init_Parameters.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/basicInitParameters.R +\name{basic_Init_Parameters} +\alias{basic_Init_Parameters} +\title{Initialize the parameters in a basic way (zero for the conditional mean, + uniform for weights, identity for covariance matrices, and uniformly distributed forthe clustering)} +\usage{ +basic_Init_Parameters(n, p, m, k) +} +\arguments{ +\item{n}{sample size} + +\item{p}{number of covariates} + +\item{m}{size of the response} + +\item{k}{number of clusters} +} +\value{ +list with phiInit, rhoInit,piInit,gamInit +} +\description{ +Initialize the parameters in a basic way (zero for the conditional mean, + uniform for weights, identity for covariance matrices, and uniformly distributed forthe clustering) +} + diff --git a/man/discardSimilarModels.Rd b/man/discardSimilarModels.Rd new file mode 100644 index 0000000..0a73b7e --- /dev/null +++ b/man/discardSimilarModels.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discardSimilarModels.R +\name{discardSimilarModels} +\alias{discardSimilarModels} +\title{Discard models which have the same relevant variables} +\usage{ +discardSimilarModels(B1, B2, glambda, rho, pi) +} +\arguments{ +\item{B1}{array of relevant coefficients (of size p*m*length(gridlambda))} + +\item{B2}{array of irrelevant coefficients (of size p*m*length(gridlambda))} + +\item{glambda}{grid of regularization parameters (vector)} + +\item{rho}{covariance matrix (of size m*m*K*size(gridLambda))} + +\item{pi}{weight parameters (of size K*size(gridLambda))} +} +\value{ +a list with update B1, B2, glambda, rho and pi, and ind the vector of indices + of selected models. +} +\description{ +Discard models which have the same relevant variables +} + diff --git a/man/discardSimilarModels2.Rd b/man/discardSimilarModels2.Rd new file mode 100644 index 0000000..05c0e61 --- /dev/null +++ b/man/discardSimilarModels2.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discardSimilarModels2.R +\name{discardSimilarModels2} +\alias{discardSimilarModels2} +\title{Similar to discardSimilarModels, for Lasso-rank procedure (focus on columns)} +\usage{ +discardSimilarModels2(B1, rho, pi) +} +\arguments{ +\item{B1}{array of relevant coefficients (of size p*m*length(gridlambda))} + +\item{rho}{covariance matrix} + +\item{pi}{weight parameters} +} +\value{ +a list with B1, in, rho, pi +} +\description{ +Similar to discardSimilarModels, for Lasso-rank procedure (focus on columns) +} + diff --git a/man/generateIO.Rd b/man/generateIO.Rd new file mode 100644 index 0000000..b420156 --- /dev/null +++ b/man/generateIO.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generateIO.R +\name{generateIO} +\alias{generateIO} +\title{Generate a sample of (X,Y) of size n} +\usage{ +generateIO(covX, covY, pi, beta, n) +} +\arguments{ +\item{covX}{covariance for covariates} + +\item{covY}{covariance for the response vector} + +\item{pi}{proportion for each cluster} + +\item{beta}{regression matrix} + +\item{n}{sample size} +} +\value{ +list with X and Y +} +\description{ +Generate a sample of (X,Y) of size n +} + diff --git a/man/generateIOdefault.Rd b/man/generateIOdefault.Rd new file mode 100644 index 0000000..332968e --- /dev/null +++ b/man/generateIOdefault.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generateIOdefault.R +\name{generateIOdefault} +\alias{generateIOdefault} +\title{Generate a sample of (X,Y) of size n with default values} +\usage{ +generateIOdefault(n, p, m, k) +} +\arguments{ +\item{n}{sample size} + +\item{p}{number of covariates} + +\item{m}{size of the response} + +\item{k}{number of clusters} +} +\value{ +list with X and Y +} +\description{ +Generate a sample of (X,Y) of size n with default values +} + diff --git a/man/gridLambda.Rd b/man/gridLambda.Rd new file mode 100644 index 0000000..551c3d7 --- /dev/null +++ b/man/gridLambda.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gridLambda.R +\name{gridLambda} +\alias{gridLambda} +\title{Construct the data-driven grid for the regularization parameters used for the Lasso estimator} +\usage{ +gridLambda(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, tau) +} +\arguments{ +\item{phiInit}{value for phi} + +\item{piInit}{value for pi} + +\item{gamInit}{value for gamma} + +\item{mini}{minimum number of iterations in EM algorithm} + +\item{maxi}{maximum number of iterations in EM algorithm} + +\item{tau}{threshold to stop EM algorithm} + +\item{rhoInt}{value for rho} +} +\value{ +the grid of regularization parameters +} +\description{ +Construct the data-driven grid for the regularization parameters used for the Lasso estimator +} + diff --git a/man/indicesSelection.Rd b/man/indicesSelection.Rd new file mode 100644 index 0000000..21d8e2d --- /dev/null +++ b/man/indicesSelection.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/indicesSelection.R +\name{indicesSelection} +\alias{indicesSelection} +\title{Construct the set of relevant indices} +\usage{ +indicesSelection(phi, thresh = 1e-06) +} +\arguments{ +\item{phi}{regression matrix, of size p*m} + +\item{thresh}{threshold to say a cofficient is equal to zero} +} +\value{ +a list with A, a matrix with relevant indices (size = p*m) and B, a + matrix with irrelevant indices (size = p*m) +} +\description{ +Construct the set of relevant indices +} + diff --git a/man/initSmallEM.Rd b/man/initSmallEM.Rd new file mode 100644 index 0000000..1b5db3f --- /dev/null +++ b/man/initSmallEM.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/initSmallEM.R +\name{initSmallEM} +\alias{initSmallEM} +\title{initialization of the EM algorithm} +\usage{ +initSmallEM(k, X, Y, tau) +} +\arguments{ +\item{k}{number of components} + +\item{X}{matrix of covariates (of size n*p)} + +\item{Y}{matrix of responses (of size n*m)} + +\item{tau}{threshold to stop EM algorithm} +} +\value{ +a list with phiInit, rhoInit, piInit, gamInit +} +\description{ +initialization of the EM algorithm +} + diff --git a/man/modelSelection.Rd b/man/modelSelection.Rd new file mode 100644 index 0000000..c65b678 --- /dev/null +++ b/man/modelSelection.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/modelSelection.R +\name{modelSelection} +\alias{modelSelection} +\title{Among a collection of models, this function constructs a subcollection of models with +models having strictly different dimensions, keeping the model which minimizes +the likelihood if there were several with the same dimension} +\usage{ +modelSelection(LLF) +} +\arguments{ +\item{LLF}{a matrix, the first column corresponds to likelihoods for several models +the second column corresponds to the dimensions of the corresponding models.} +} +\value{ +a list with indices, a vector of indices selected models, + and D1, a vector of corresponding dimensions +} +\description{ +Among a collection of models, this function constructs a subcollection of models with +models having strictly different dimensions, keeping the model which minimizes +the likelihood if there were several with the same dimension +} + diff --git a/man/vec_bin.Rd b/man/vec_bin.Rd new file mode 100644 index 0000000..1689488 --- /dev/null +++ b/man/vec_bin.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vec_bin.R +\name{vec_bin} +\alias{vec_bin} +\title{A function needed in initSmallEM} +\usage{ +vec_bin(X, r) +} +\arguments{ +\item{X}{vector with integer values} + +\item{r}{integer} +} +\value{ +a list with Z (a binary vector of size the size of X) and indices where Z is equal to 1 +} +\description{ +A function needed in initSmallEM +} +