From: Benjamin Auder <benjamin.auder@somewhere>
Date: Mon, 10 Feb 2020 12:56:35 +0000 (+0100)
Subject: Merge branch 'master' of github.com:yagu0/valse
X-Git-Url: https://git.auder.net/?p=valse.git;a=commitdiff_plain;h=04845e3300b5450629bf1a2c3344d2f9419e91a6;hp=f32535f2bc8d50470aa87204bbd7971805dbc9ef

Merge branch 'master' of github.com:yagu0/valse
---

diff --git a/.gitignore b/.gitignore
index a84520a..d8cc23d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,4 @@ Rprof.out
 *.o
 *.so
 *.exe
+.Rproj.user
diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION
index 65ed448..b620b65 100644
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -27,7 +27,7 @@ Suggests:
     testthat
 URL: http://git.auder.net/?p=valse.git
 License: MIT + file LICENSE
-RoxygenNote: 6.0.1
+RoxygenNote: 7.0.2
 Collate:
     'plot_valse.R'
     'main.R'
diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R
index c30b023..93012fb 100644
--- a/pkg/R/EMGLLF.R
+++ b/pkg/R/EMGLLF.R
@@ -1,6 +1,9 @@
 #' EMGLLF
 #'
-#' Description de EMGLLF
+#' Run a generalized EM algorithm developped for mixture of Gaussian regression 
+#' models with variable selection by an extension of the Lasso estimator (regularization parameter lambda).
+#' Reparametrization is done to ensure invariance by homothetic transformation.
+#' It returns a collection of models, varying the number of clusters and the sparsity in the regression mean.
 #'
 #' @param phiInit an initialization for phi
 #' @param rhoInit an initialization for rho
@@ -14,13 +17,13 @@
 #' @param Y matrix of responses (of size n*m)
 #' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
 #'
-#' @return A list ... phi,rho,pi,LLF,S,affec:
-#'   phi : parametre de moyenne renormalisÃ©, calculÃ© par l'EM
-#'   rho : parametre de variance renormalisÃ©, calculÃ© par l'EM
-#'   pi : parametre des proportions renormalisÃ©, calculÃ© par l'EM
-#'   LLF : log vraisemblance associÃ©e Ã  cet Ã©chantillon, pour les valeurs estimÃ©es des paramÃ¨tres
-#'   S : ...
-#'   affec : ...
+#' @return A list (corresponding to the model collection) defined by (phi,rho,pi,LLF,S,affec):
+#'   phi : regression mean for each cluster
+#'   rho : variance (homothetic) for each cluster
+#'   pi : proportion for each cluster
+#'   LLF : log likelihood with respect to the training set
+#'   S : selected variables indexes
+#'   affec : cluster affectation for each observation (of the training set)
 #'
 #' @export
 EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
@@ -36,8 +39,8 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
   # Function in C
   n <- nrow(X)  #nombre d'echantillons
   p <- ncol(X)  #nombre de covariables
-  m <- ncol(Y)  #taille de Y (multivariÃ©)
-  k <- length(piInit)  #nombre de composantes dans le mÃ©lange
+  m <- ncol(Y)  #taille de Y (multivarie)
+  k <- length(piInit)  #nombre de composantes dans le melange
   .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
     X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k),
     llh = double(1), S = double(p * m * k), affec = integer(n), n, p, m, k,
diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R
index 4054e25..2dc6c37 100644
--- a/pkg/R/EMGrank.R
+++ b/pkg/R/EMGrank.R
@@ -1,19 +1,22 @@
 #' EMGrank
 #'
-#' Description de EMGrank
+#' Run an generalized EM algorithm developped for mixture of Gaussian regression 
+#' models with variable selection by an extension of the low rank estimator.
+#' Reparametrization is done to ensure invariance by homothetic transformation.
+#' It returns a collection of models, varying the number of clusters and the rank of the regression mean.
 #'
-#' @param Pi Parametre de proportion
-#' @param Rho Parametre initial de variance renormalisÃ©
-#' @param mini Nombre minimal d'itÃ©rations dans l'algorithme EM
-#' @param maxi Nombre maximal d'itÃ©rations dans l'algorithme EM
-#' @param X RÃ©gresseurs
-#' @param Y RÃ©ponse
-#' @param eps Seuil pour accepter la convergence
-#' @param rank Vecteur des rangs possibles
+#' @param Pi An initialization for pi
+#' @param Rho An initialization for rho, the variance parameter
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param rank vector of possible ranks
 #'
-#' @return A list ...
-#'   phi : parametre de moyenne renormalisÃ©, calculÃ© par l'EM
-#'   LLF : log vraisemblance associÃ© Ã  cet Ã©chantillon, pour les valeurs estimÃ©es des paramÃ¨tres
+#' @return A list (corresponding to the model collection) defined by (phi,LLF):
+#'   phi : regression mean for each cluster
+#'   LLF : log likelihood with respect to the training set
 #'
 #' @export
 EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE)
@@ -27,8 +30,8 @@ EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE)
   # Function in C
   n <- nrow(X)  #nombre d'echantillons
   p <- ncol(X)  #nombre de covariables
-  m <- ncol(Y)  #taille de Y (multivariÃ©)
-  k <- length(Pi)  #nombre de composantes dans le mÃ©lange
+  m <- ncol(Y)  #taille de Y (multivarie)
+  k <- length(Pi)  #nombre de composantes dans le melange
   .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, as.integer(rank), phi = double(p * m * k), 
     LLF = double(1), n, p, m, k, PACKAGE = "valse")
 }
diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R
index 9df8168..6e18409 100644
--- a/pkg/R/constructionModelesLassoRank.R
+++ b/pkg/R/constructionModelesLassoRank.R
@@ -33,10 +33,10 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
   for (r in 1:k)
   {
     # On veut le tableau de toutes les combinaisons de rangs possibles, et des
-    # lambdas Dans la premiÃ¨re colonne : on rÃ©pÃ¨te (rank.max-rank.min)^(k-1) chaque
-    # chiffre : Ã§a remplit la colonne Dans la deuxieme : on rÃ©pÃ¨te
-    # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait Ã§a (rank.max-rank.min)^2
-    # fois ...  Dans la derniÃ¨re, on rÃ©pÃ¨te chaque chiffre une fois, et on fait Ã§a
+    # lambdas Dans la premiere colonne : on repete (rank.max-rank.min)^(k-1) chaque
+    # chiffre : ca remplit la colonne Dans la deuxieme : on repete
+    # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ca (rank.max-rank.min)^2
+    # fois ...  Dans la derniere, on repete chaque chiffre une fois, et on fait ca
     # (rank.min-rank.max)^(k-1) fois.
     RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1),
       each = deltaRank^(k - r)), each = L)
diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R
index 7e9cce5..937ea73 100644
--- a/pkg/R/initSmallEM.R
+++ b/pkg/R/initSmallEM.R
@@ -34,7 +34,7 @@ initSmallEM <- function(k, X, Y, fast)
     for (r in 1:k)
     {
       Z <- Zinit1[, repet]
-      Z_indice <- seq_len(n)[Z == r]  #renvoit les indices oÃ¹ Z==r
+      Z_indice <- seq_len(n)[Z == r]  #renvoit les indices ou Z==r
       if (length(Z_indice) == 1) {
         betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*%
           crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R
index eb6c590..a4bc0f4 100644
--- a/pkg/R/selectVariables.R
+++ b/pkg/R/selectVariables.R
@@ -1,6 +1,6 @@
 #' selectVariables
 #'
-#' It is a function which construct, for a given lambda, the sets of relevant variables.
+#' It is a function which constructs, for a given lambda, the sets for each cluster of relevant variables.
 #'
 #' @param phiInit an initial estimator for phi (size: p*m*k)
 #' @param rhoInit an initial estimator for rho (size: m*m*k)
@@ -69,9 +69,7 @@ selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma
     parallel::stopCluster(cl)
 
   print(out)
-  # Suppress models which are computed twice En fait, ca ca fait la comparaison de
-  # tous les parametres On veut juste supprimer ceux qui ont les memes variables
-  # sÃ©lectionnÃ©es
+  # Suppress models which are computed twice 
   # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ]
   selec <- lapply(out, function(model) model$selected)
   ind_dup <- duplicated(selec)