From: emilie Date: Mon, 6 Mar 2017 12:27:35 +0000 (+0100) Subject: upadate valse.R X-Git-Url: https://git.auder.net/variants/%24%7Bvname%7D/%7B%7B%20asset%28%27mixstore/js/pieces/cq.svg?a=commitdiff_plain;h=c7dab9ff8b95a7630c7dafdcf40d60c659290ef2;p=valse.git upadate valse.R --- diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..d3bb83f --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,17 @@ +# Generated by roxygen2: do not edit by hand + +export(basicInitParameters) +export(discardSimilarModels_EMGLLF) +export(discardSimilarModels_EMGrank) +export(generateXY) +export(generateXYdefault) +export(gridLambda) +export(initSmallEM) +export(modelSelection) +export(selectVariables) +importFrom(methods,new) +importFrom(stats,cutree) +importFrom(stats,dist) +importFrom(stats,hclust) +importFrom(stats,runif) +useDynLib(valse) diff --git a/R/valse.R b/R/valse.R new file mode 100644 index 0000000..e5205a5 --- /dev/null +++ b/R/valse.R @@ -0,0 +1,121 @@ +#' Main function +#' +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param procedure among 'LassoMLE' or 'LassoRank' +#' @param selecMod method to select a model among 'SlopeHeuristic', 'BIC', 'AIC' +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param kmin integer, minimum number of clusters, by default = 2 +#' @param kmax integer, maximum number of clusters, by default = 10 +#' @param rang.min integer, minimum rank in the low rank procedure, by default = 1 +#' @param rang.max integer, maximum rank in the +#' @return a list with estimators of parameters +#' @export +#----------------------------------------------------------------------- +valse = function(X,Y,procedure,selecMod,gamma = 1,mini = 10, + maxi = 100,eps = 1e-4,kmin = 2,kmax = 10, + rang.min = 1,rang.max = 10) { + ################################## + #core workflow: compute all models + ################################## + + p = dim(phiInit)[1] + m = dim(phiInit)[2] + + print("main loop: over all k and all lambda") + for (k in kmin:kmax) + { + print(k) + + print("Parameters initialization") + #smallEM initializes parameters by k-means and regression model in each component, + #doing this 20 times, and keeping the values maximizing the likelihood after 10 + #iterations of the EM algorithm. + init = initSmallEM(k, X, Y) + phiInit <<- init$phiInit + rhoInit <<- init$rhoInit + piInit <<- init$piInit + gamInit <<- init$gamInit + + gridLambda <<- gridLambda(phiInit, rhoInit, piInit, tauInit, X, Y, gamma, mini, maxi, eps) + + print("Compute relevant parameters") + #select variables according to each regularization parameter + #from the grid: A1 corresponding to selected variables, and + #A2 corresponding to unselected variables. + params = selectiontotale(phiInit,rhoInit,piInit,tauInit, + mini,maxi,gamma,gridLambda, + X,Y,thresh,eps) + A1 <<- params$A1 + A2 <<- params$A2 + Rho <<- params$Rho + Pi <<- params$Pi + + if (procedure == 'LassoMLE') { + print('run the procedure Lasso-MLE') + #compute parameter estimations, with the Maximum Likelihood + #Estimator, restricted on selected variables. + model = constructionModelesLassoMLE( + phiInit, rhoInit,piInit,tauInit,mini,maxi, + gamma,gridLambda,X,Y,thresh,eps,A1,A2) + ################################################ + ### Regarder la SUITE + r1 = runProcedure1() + Phi2 = Phi + Rho2 = Rho + Pi2 = Pi + + if (is.null(dim(Phi2))) + #test was: size(Phi2) == 0 + { + Phi[, , 1:k] <<- r1$phi + Rho[, , 1:k] <<- r1$rho + Pi[1:k,] <<- r1$pi + } else + { + Phi <<- + array(0., dim = c(p, m, kmax, dim(Phi2)[4] + dim(r1$phi)[4])) + Phi[, , 1:(dim(Phi2)[3]), 1:(dim(Phi2)[4])] <<- Phi2 + Phi[, , 1:k, dim(Phi2)[4] + 1] <<- r1$phi + Rho <<- + array(0., dim = c(m, m, kmax, dim(Rho2)[4] + dim(r1$rho)[4])) + Rho[, , 1:(dim(Rho2)[3]), 1:(dim(Rho2)[4])] <<- Rho2 + Rho[, , 1:k, dim(Rho2)[4] + 1] <<- r1$rho + Pi <<- array(0., dim = c(kmax, dim(Pi2)[2] + dim(r1$pi)[2])) + Pi[1:nrow(Pi2), 1:ncol(Pi2)] <<- Pi2 + Pi[1:k, ncol(Pi2) + 1] <<- r1$pi + } + } else { + print('run the procedure Lasso-Rank') + #compute parameter estimations, with the Low Rank + #Estimator, restricted on selected variables. + model = constructionModelesLassoRank(Pi, Rho, mini, maxi, X, Y, eps, + A1, rank.min, rank.max) + + ################################################ + ### Regarder la SUITE + phi = runProcedure2()$phi + Phi2 = Phi + if (dim(Phi2)[1] == 0) + { + Phi[, , 1:k,] <<- phi + } else + { + Phi <<- array(0, dim = c(p, m, kmax, dim(Phi2)[4] + dim(phi)[4])) + Phi[, , 1:(dim(Phi2)[3]), 1:(dim(Phi2)[4])] <<- Phi2 + Phi[, , 1:k,-(1:(dim(Phi2)[4]))] <<- phi + } + } + } + print('Model selection') + if (selecMod == 'SlopeHeuristic') { + + } else if (selecMod == 'BIC') { + + } else if (selecMod == 'AIC') { + + } +} diff --git a/man/basicInitParameters.Rd b/man/basicInitParameters.Rd new file mode 100644 index 0000000..6ed8547 --- /dev/null +++ b/man/basicInitParameters.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generateSampleInputs.R +\name{basicInitParameters} +\alias{basicInitParameters} +\title{Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights, +identity for covariance matrices, and uniformly distributed for the clustering)} +\usage{ +basicInitParameters(n, p, m, k) +} +\arguments{ +\item{n}{sample size} + +\item{p}{number of covariates} + +\item{m}{size of the response} + +\item{k}{number of clusters} +} +\value{ +list with phiInit, rhoInit,piInit,gamInit +} +\description{ +Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights, +identity for covariance matrices, and uniformly distributed for the clustering) +} + diff --git a/man/discardSimilarModels_EMGLLF.Rd b/man/discardSimilarModels_EMGLLF.Rd new file mode 100644 index 0000000..20ac957 --- /dev/null +++ b/man/discardSimilarModels_EMGLLF.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discardSimilarModels.R +\name{discardSimilarModels_EMGLLF} +\alias{discardSimilarModels_EMGLLF} +\title{Discard models which have the same relevant variables - for EMGLLF} +\usage{ +discardSimilarModels_EMGLLF(B1, B2, glambda, rho, pi) +} +\arguments{ +\item{B1}{array of relevant coefficients (of size p*m*length(gridlambda))} + +\item{B2}{array of irrelevant coefficients (of size p*m*length(gridlambda))} + +\item{glambda}{grid of regularization parameters (vector)} + +\item{rho}{covariance matrix (of size m*m*K*size(gridLambda))} + +\item{pi}{weight parameters (of size K*size(gridLambda))} +} +\value{ +a list with update B1, B2, glambda, rho and pi, and ind the vector of indices +of selected models. +} +\description{ +Discard models which have the same relevant variables - for EMGLLF +} + diff --git a/man/discardSimilarModels_EMGrank.Rd b/man/discardSimilarModels_EMGrank.Rd new file mode 100644 index 0000000..932c4ff --- /dev/null +++ b/man/discardSimilarModels_EMGrank.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/discardSimilarModels.R +\name{discardSimilarModels_EMGrank} +\alias{discardSimilarModels_EMGrank} +\title{Discard models which have the same relevant variables + - for Lasso-rank procedure (focus on columns)} +\usage{ +discardSimilarModels_EMGrank(B1, rho, pi) +} +\arguments{ +\item{B1}{array of relevant coefficients (of size p*m*length(gridlambda))} + +\item{rho}{covariance matrix} + +\item{pi}{weight parameters} +} +\value{ +a list with B1, in, rho, pi +} +\description{ +Discard models which have the same relevant variables + - for Lasso-rank procedure (focus on columns) +} + diff --git a/man/generateXY.Rd b/man/generateXY.Rd new file mode 100644 index 0000000..ef76d82 --- /dev/null +++ b/man/generateXY.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generateSampleInputs.R +\name{generateXY} +\alias{generateXY} +\title{Generate a sample of (X,Y) of size n} +\usage{ +generateXY(meanX, covX, covY, pi, beta, n) +} +\arguments{ +\item{meanX}{matrix of group means for covariates (of size p*K)} + +\item{covX}{covariance for covariates (of size p*p*K)} + +\item{covY}{covariance for the response vector (of size m*m*K)} + +\item{pi}{proportion for each cluster} + +\item{beta}{regression matrix} + +\item{n}{sample size} +} +\value{ +list with X and Y +} +\description{ +Generate a sample of (X,Y) of size n +} + diff --git a/man/generateXYdefault.Rd b/man/generateXYdefault.Rd new file mode 100644 index 0000000..3f80c08 --- /dev/null +++ b/man/generateXYdefault.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generateSampleInputs.R +\name{generateXYdefault} +\alias{generateXYdefault} +\title{Generate a sample of (X,Y) of size n with default values} +\usage{ +generateXYdefault(n, p, m, k) +} +\arguments{ +\item{n}{sample size} + +\item{p}{number of covariates} + +\item{m}{size of the response} + +\item{k}{number of clusters} +} +\value{ +list with X and Y +} +\description{ +Generate a sample of (X,Y) of size n with default values +} + diff --git a/man/gridLambda.Rd b/man/gridLambda.Rd new file mode 100644 index 0000000..cc203a7 --- /dev/null +++ b/man/gridLambda.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gridLambda.R +\name{gridLambda} +\alias{gridLambda} +\title{Construct the data-driven grid for the regularization parameters used for the Lasso estimator} +\usage{ +gridLambda(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, tau) +} +\arguments{ +\item{phiInit}{value for phi} + +\item{rhoInit}{value for rho} + +\item{piInit}{value for pi} + +\item{gamInit}{value for gamma} + +\item{X}{matrix of covariates (of size n*p)} + +\item{Y}{matrix of responses (of size n*m)} + +\item{gamma}{power of weights in the penalty} + +\item{mini}{minimum number of iterations in EM algorithm} + +\item{maxi}{maximum number of iterations in EM algorithm} + +\item{tau}{threshold to stop EM algorithm} +} +\value{ +the grid of regularization parameters +} +\description{ +Construct the data-driven grid for the regularization parameters used for the Lasso estimator +} + diff --git a/man/initSmallEM.Rd b/man/initSmallEM.Rd new file mode 100644 index 0000000..86be0cb --- /dev/null +++ b/man/initSmallEM.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/initSmallEM.R +\name{initSmallEM} +\alias{initSmallEM} +\title{initialization of the EM algorithm} +\usage{ +initSmallEM(k, X, Y) +} +\arguments{ +\item{k}{number of components} + +\item{X}{matrix of covariates (of size n*p)} + +\item{Y}{matrix of responses (of size n*m)} +} +\value{ +a list with phiInit, rhoInit, piInit, gamInit +} +\description{ +initialization of the EM algorithm +} + diff --git a/man/modelSelection.Rd b/man/modelSelection.Rd new file mode 100644 index 0000000..30aeaa5 --- /dev/null +++ b/man/modelSelection.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/modelSelection.R +\name{modelSelection} +\alias{modelSelection} +\title{Among a collection of models, this function constructs a subcollection of models with +models having strictly different dimensions, keeping the model which minimizes +the likelihood if there were several with the same dimension} +\usage{ +modelSelection(LLF) +} +\arguments{ +\item{LLF}{a matrix, the first column corresponds to likelihoods for several models +the second column corresponds to the dimensions of the corresponding models.} +} +\value{ +a list with indices, a vector of indices selected models, + and D1, a vector of corresponding dimensions +} +\description{ +Among a collection of models, this function constructs a subcollection of models with +models having strictly different dimensions, keeping the model which minimizes +the likelihood if there were several with the same dimension +} + diff --git a/man/selectVariables.Rd b/man/selectVariables.Rd new file mode 100644 index 0000000..09a52f2 --- /dev/null +++ b/man/selectVariables.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/selectVariables.R +\name{selectVariables} +\alias{selectVariables} +\title{selectVaribles +It is a function which construct, for a given lambda, the sets of +relevant variables and irrelevant variables.} +\usage{ +selectVariables(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, glambda, + X, Y, thres, tau) +} +\arguments{ +\item{phiInit}{an initial estimator for phi (size: p*m*k)} + +\item{rhoInit}{an initial estimator for rho (size: m*m*k)} + +\item{piInit}{an initial estimator for pi (size : k)} + +\item{gamInit}{an initial estimator for gamma} + +\item{mini}{minimum number of iterations in EM algorithm} + +\item{maxi}{maximum number of iterations in EM algorithm} + +\item{gamma}{power in the penalty} + +\item{glambda}{grid of regularization parameters} + +\item{X}{matrix of regressors} + +\item{Y}{matrix of responses} + +\item{thres}{threshold to consider a coefficient to be equal to 0} + +\item{tau}{threshold to say that EM algorithm has converged} +} +\value{ +TODO +} +\description{ +selectVaribles +It is a function which construct, for a given lambda, the sets of +relevant variables and irrelevant variables. +} +\examples{ +TODO + +} + diff --git a/valse.Rproj b/valse.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/valse.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source