From: emilie <emilie@devijver.org>
Date: Fri, 21 Apr 2017 13:58:12 +0000 (+0200)
Subject: essai fusion
X-Git-Url: https://git.auder.net/assets/css/current/%7B%7B%20path%28%27mixstore_static_about%27%29%20%7D%7D?a=commitdiff_plain;h=228ee602a972fcac6177db0d539bf9d0c5fa477f;p=valse.git

essai fusion
---

diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION
new file mode 100644
index 0000000..3b33e25
--- /dev/null
+++ b/pkg/DESCRIPTION
@@ -0,0 +1,42 @@
+Package: valse
+Title: Variable Selection With Mixture Of Models
+Date: 2016-12-01
+Version: 0.1-0
+Description: Two methods are implemented to cluster data with finite mixture
+    regression models. Those procedures deal with high-dimensional covariates and
+    responses through a variable selection procedure based on the Lasso estimator.
+    A low-rank constraint could be added, computed for the Lasso-Rank procedure.
+    A collection of models is constructed, varying the level of sparsity and the
+    number of clusters, and a model is selected using a model selection criterion
+    (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model-
+    based clustering for high-dimensional data. Application to functional data' by
+    Emilie Devijver, published in Advances in Data Analysis and Clustering (2016).
+Author: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre],
+    Emilie Devijver <Emilie.Devijver@kuleuven.be> [aut],
+    Benjamin Goehry <Benjamin.Goehry@math.u-psud.fr> [aut]
+Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr>
+Depends:
+    R (>= 3.0.0)
+Imports:
+    MASS,
+    parallel
+Suggests:
+    capushe,
+    roxygen2,
+    testhat
+URL: http://git.auder.net/?p=valse.git
+License: MIT + file LICENSE
+RoxygenNote: 5.0.1
+Collate:
+    'plot_valse.R'
+    'main.R'
+    'selectVariables.R'
+    'constructionModelesLassoRank.R'
+    'constructionModelesLassoMLE.R'
+    'computeGridLambda.R'
+    'initSmallEM.R'
+    'EMGrank.R'
+    'EMGLLF.R'
+    'generateXY.R'
+    'A_NAMESPACE.R'
+    'util.R'
diff --git a/pkg/LICENSE b/pkg/LICENSE
new file mode 100644
index 0000000..a212458
--- /dev/null
+++ b/pkg/LICENSE
@@ -0,0 +1,23 @@
+Copyright (c)
+  2014-2017, Benjamin Auder
+	2014-2017, Emilie Devijver
+	2016-2017, Benjamin Goehry
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R
new file mode 100644
index 0000000..8e1783e
--- /dev/null
+++ b/pkg/R/A_NAMESPACE.R
@@ -0,0 +1,16 @@
+#' @include generateXY.R
+#' @include EMGLLF.R
+#' @include EMGrank.R
+#' @include initSmallEM.R
+#' @include computeGridLambda.R
+#' @include constructionModelesLassoMLE.R
+#' @include constructionModelesLassoRank.R
+#' @include selectVariables.R
+#' @include main.R
+#' @include plot_valse.R
+#'
+#' @useDynLib valse
+#'
+#' @importFrom parallel makeCluster parLapply stopCluster clusterExport
+#' @importFrom MASS ginv
+NULL
diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R
new file mode 100644
index 0000000..bf4476b
--- /dev/null
+++ b/pkg/R/EMGLLF.R
@@ -0,0 +1,194 @@
+#' EMGLLF 
+#'
+#' Description de EMGLLF
+#'
+#' @param phiInit an initialization for phi
+#' @param rhoInit an initialization for rho
+#' @param piInit an initialization for pi
+#' @param gamInit initialization for the a posteriori probabilities
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param lambda regularization parameter in the Lasso estimation
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#'
+#' @return A list ... phi,rho,pi,LLF,S,affec:
+#'   phi : parametre de moyenne renormalisÃ©, calculÃ© par l'EM
+#'   rho : parametre de variance renormalisÃ©, calculÃ© par l'EM
+#'   pi : parametre des proportions renormalisÃ©, calculÃ© par l'EM
+#'   LLF : log vraisemblance associÃ©e Ã  cet Ã©chantillon, pour les valeurs estimÃ©es des paramÃ¨tres
+#'   S : ... affec : ...
+#'
+#' @export
+EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+  X, Y, eps, fast)
+{
+  if (!fast)
+  {
+    # Function in R
+    return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+      X, Y, eps))
+  }
+
+  # Function in C
+  n <- nrow(X)  #nombre d'echantillons
+  p <- ncol(X)  #nombre de covariables
+  m <- ncol(Y)  #taille de Y (multivariÃ©)
+  k <- length(piInit)  #nombre de composantes dans le mÃ©lange
+  .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+    X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k), 
+    LLF = double(maxi), S = double(p * m * k), affec = integer(n), n, p, m, k, 
+    PACKAGE = "valse")
+}
+
+# R version - slow but easy to read
+.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+  X, Y, eps)
+{
+  # Matrix dimensions
+  n <- nrow(X)
+  p <- ncol(X)
+  m <- ncol(Y)
+  k <- length(piInit)
+
+  # Adjustments required when p==1 or m==1 (var.sel. or output dim 1)
+  if (p==1 || m==1)
+    phiInit <- array(phiInit, dim=c(p,m,k))
+  if (m==1)
+    rhoInit <- array(rhoInit, dim=c(m,m,k))
+
+  # Outputs
+  phi <- phiInit
+  rho <- rhoInit
+  pi <- piInit
+  llh <- -Inf
+  S <- array(0, dim = c(p, m, k))
+
+  # Algorithm variables
+  gam <- gamInit
+  Gram2 <- array(0, dim = c(p, p, k))
+  ps2 <- array(0, dim = c(p, m, k))
+  X2 <- array(0, dim = c(n, p, k))
+  Y2 <- array(0, dim = c(n, m, k))
+  EPS <- 1e-15
+
+  for (ite in 1:maxi)
+  {
+    # Remember last pi,rho,phi values for exit condition in the end of loop
+    Phi <- phi
+    Rho <- rho
+    Pi <- pi
+
+    # Computations associated to X and Y
+    for (r in 1:k)
+    {
+      for (mm in 1:m)
+        Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
+      for (i in 1:n)
+        X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
+      for (mm in 1:m)
+        ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
+      for (j in 1:p)
+      {
+        for (s in 1:p)
+          Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
+      }
+    }
+
+    ## M step
+
+    # For pi
+    b <- sapply(1:k, function(r) sum(abs(phi[, , r])))
+    gam2 <- colSums(gam)
+    a <- sum(gam %*% log(pi))
+
+    # While the proportions are nonpositive
+    kk <- 0
+    pi2AllPositive <- FALSE
+    while (!pi2AllPositive)
+    {
+      pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi)
+      pi2AllPositive <- all(pi2 >= 0)
+      kk <- kk + 1
+    }
+
+    # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
+    while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+      # na.rm=TRUE to handle 0*log(0)
+      -sum(gam2 * log(pi2), na.rm=TRUE)/n + lambda * sum(pi2^gamma * b))
+    {
+      pi2 <- pi + 0.1^kk * (1/n * gam2 - pi)
+      kk <- kk + 1
+    }
+    t <- 0.1^kk
+    pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi))
+
+    # For phi and rho
+    for (r in 1:k)
+    {
+      for (mm in 1:m)
+      {
+        ps <- 0
+        for (i in 1:n)
+          ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r])
+        nY2 <- sum(Y2[, mm, r]^2)
+        rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2)
+      }
+    }
+
+    for (r in 1:k)
+    {
+      for (j in 1:p)
+      {
+        for (mm in 1:m)
+        {
+          S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] +
+            sum(phi[-j, mm, r] * Gram2[j, -j, r])
+          if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) {
+            phi[j, mm, r] <- 0
+          } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) {
+            phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r]
+          } else {
+            phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r]
+          }
+        }
+      }
+    }
+
+    ## E step
+
+    # Precompute det(rho[,,r]) for r in 1...k
+    detRho <- sapply(1:k, function(r) gdet(rho[, , r]))
+    sumLogLLH <- 0
+    for (i in 1:n)
+    {
+      # Update gam[,]; use log to avoid numerical problems
+      logGam <- sapply(1:k, function(r) {
+        log(pi[r]) + log(detRho[r]) - 0.5 *
+          sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2)
+      })
+
+      logGam <- logGam - max(logGam) #adjust without changing proportions
+      gam[i, ] <- exp(logGam)
+      norm_fact <- sum(gam[i, ])
+      gam[i, ] <- gam[i, ] / norm_fact
+      sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2))
+    }
+
+    sumPen <- sum(pi^gamma * b)
+    last_llh <- llh
+    llh <- -sumLogLLH/n #+ lambda * sumPen
+    dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh)))
+    Dist1 <- max((abs(phi - Phi))/(1 + abs(phi)))
+    Dist2 <- max((abs(rho - Rho))/(1 + abs(rho)))
+    Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi)))
+    dist2 <- max(Dist1, Dist2, Dist3)
+
+    if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
+      break
+  }
+
+  list(phi = phi, rho = rho, pi = pi, llh = llh, S = S)
+}
diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R
new file mode 100644
index 0000000..b85a0fa
--- /dev/null
+++ b/pkg/R/EMGrank.R
@@ -0,0 +1,120 @@
+#' EMGrank
+#'
+#' Description de EMGrank
+#'
+#' @param Pi Parametre de proportion
+#' @param Rho Parametre initial de variance renormalisÃ©
+#' @param mini Nombre minimal d'itÃ©rations dans l'algorithme EM
+#' @param maxi Nombre maximal d'itÃ©rations dans l'algorithme EM
+#' @param X RÃ©gresseurs
+#' @param Y RÃ©ponse
+#' @param tau Seuil pour accepter la convergence
+#' @param rank Vecteur des rangs possibles
+#'
+#' @return A list ...
+#'   phi : parametre de moyenne renormalisÃ©, calculÃ© par l'EM
+#'   LLF : log vraisemblance associÃ© Ã  cet Ã©chantillon, pour les valeurs estimÃ©es des paramÃ¨tres
+#'
+#' @export
+EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast = TRUE)
+{
+  if (!fast)
+  {
+    # Function in R
+    return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
+  }
+
+  # Function in C
+  n <- nrow(X)  #nombre d'echantillons
+  p <- ncol(X)  #nombre de covariables
+  m <- ncol(Y)  #taille de Y (multivariÃ©)
+  k <- length(Pi)  #nombre de composantes dans le mÃ©lange
+  .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, tau, rank, phi = double(p * m * k), 
+    LLF = double(1), n, p, m, k, PACKAGE = "valse")
+}
+
+# helper to always have matrices as arg (TODO: put this elsewhere? improve?)  -->
+# Yes, we should use by-columns storage everywhere... [later!]
+matricize <- function(X)
+{
+  if (!is.matrix(X)) 
+    return(t(as.matrix(X)))
+  return(X)
+}
+
+# R version - slow but easy to read
+.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+{
+  # matrix dimensions
+  n <- nrow(X)
+  p <- ncol(X)
+  m <- ncol(Y)
+  k <- length(Pi)
+
+  # init outputs
+  phi <- array(0, dim = c(p, m, k))
+  Z <- rep(1, n)
+  LLF <- 0
+
+  # local variables
+  Phi <- array(0, dim = c(p, m, k))
+  deltaPhi <- c()
+  sumDeltaPhi <- 0
+  deltaPhiBufferSize <- 20
+  
+  # main loop
+  ite <- 1
+  while (ite <= mini || (ite <= maxi && sumDeltaPhi > tau))
+  {
+    # M step: update for Beta ( and then phi)
+    for (r in 1:k)
+    {
+      Z_indice <- seq_len(n)[Z == r] #indices where Z == r
+      if (length(Z_indice) == 0) 
+        next
+      # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+      s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% 
+                 crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ])))
+      S <- s$d
+      # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation
+      # of the initial product
+      if (rank[r] < length(S)) 
+        S[(rank[r] + 1):length(S)] <- 0
+      phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r]
+    }
+
+    # Step E and computation of the loglikelihood
+    sumLogLLF2 <- 0
+    for (i in seq_len(n))
+    {
+      sumLLF1 <- 0
+      maxLogGamIR <- -Inf
+      for (r in seq_len(k))
+      {
+        dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r])
+        logGamIR <- log(Pi[r]) + log(gdet(Rho[, , r])) - 0.5 * dotProduct
+        # Z[i] = index of max (gam[i,])
+        if (logGamIR > maxLogGamIR)
+        {
+          Z[i] <- r
+          maxLogGamIR <- logGamIR
+        }
+        sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2)
+      }
+      sumLogLLF2 <- sumLogLLF2 + log(sumLLF1)
+    }
+
+    LLF <- -1/n * sumLogLLF2
+
+    # update distance parameter to check algorithm convergence (delta(phi, Phi))
+    deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
+    if (length(deltaPhi) > deltaPhiBufferSize) 
+      deltaPhi <- deltaPhi[2:length(deltaPhi)]
+    sumDeltaPhi <- sum(abs(deltaPhi))
+
+    # update other local variables
+    Phi <- phi
+    ite <- ite + 1
+  }
+  return(list(phi = phi, LLF = LLF))
+}
diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R
new file mode 100644
index 0000000..8449d10
--- /dev/null
+++ b/pkg/R/computeGridLambda.R
@@ -0,0 +1,36 @@
+#' computeGridLambda 
+#'
+#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator
+#'
+#' @param phiInit value for phi
+#' @param rhoInit  for rho
+#' @param piInit  for pi
+#' @param gamInit value for gamma
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param gamma power of weights in the penalty
+#' @param mini minimum number of iterations in EM algorithm
+#' @param maxi maximum number of iterations in EM algorithm
+#' @param tau threshold to stop EM algorithm
+#'
+#' @return the grid of regularization parameters
+#'
+#' @export
+computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, 
+  maxi, tau, fast)
+{
+  n <- nrow(X)
+  p <- ncol(X)
+  m <- ncol(Y)
+  k <- length(piInit)
+
+  list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, 
+    X, Y, tau, fast)
+  grid <- array(0, dim = c(p, m, k))
+  for (j in 1:p)
+  {
+    for (mm in 1:m)
+      grid[j, mm, ] <- abs(list_EMG$S[j, mm, ])/(n * list_EMG$pi^gamma)
+  }
+  sort(unique(grid))
+}
diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R
new file mode 100644
index 0000000..9743f0c
--- /dev/null
+++ b/pkg/R/constructionModelesLassoMLE.R
@@ -0,0 +1,111 @@
+#' constructionModelesLassoMLE 
+#'
+#' Construct a collection of models with the Lasso-MLE procedure.
+#' 
+#' @param phiInit an initialization for phi, get by initSmallEM.R
+#' @param rhoInit an initialization for rho, get by initSmallEM.R
+#' @param piInit an initialization for pi, get by initSmallEM.R
+#' @param gamInit an initialization for gam, get by initSmallEM.R
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param S output of selectVariables.R
+#' @param ncores Number of cores, by default = 3
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#' 
+#' @return a list with several models, defined by phi, rho, pi, llh
+#'
+#' @export
+constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, 
+  maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose)
+{
+  if (ncores > 1)
+  {
+    cl <- parallel::makeCluster(ncores, outfile = "")
+    parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit", 
+      "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", 
+      "ncores", "fast", "verbose"))
+  }
+
+  # Individual model computation
+  computeAtLambda <- function(lambda)
+  {
+    if (ncores > 1) 
+      require("valse")  #nodes start with an empty environment
+
+    if (verbose) 
+      print(paste("Computations for lambda=", lambda))
+
+    n <- dim(X)[1]
+    p <- dim(phiInit)[1]
+    m <- dim(phiInit)[2]
+    k <- dim(phiInit)[3]
+    sel.lambda <- S[[lambda]]$selected
+    # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
+    col.sel <- which(sapply(sel.lambda, length) > 0)  #if list of selected vars
+    if (length(col.sel) == 0) 
+      return(NULL)
+
+    # lambda == 0 because we compute the EMV: no penalization here
+    res <- EMGLLF(array(phiInit[col.sel, , ],dim=c(length(col.sel),m,k)), rhoInit,
+      piInit, gamInit, mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast)
+
+    # Eval dimension from the result + selected
+    phiLambda2 <- res$phi
+    rhoLambda <- res$rho
+    piLambda <- res$pi
+    phiLambda <- array(0, dim = c(p, m, k))
+    for (j in seq_along(col.sel))
+      phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ]
+    dimension <- length(unlist(sel.lambda))
+
+    ## Computation of the loglikelihood
+    # Precompute det(rhoLambda[,,r]) for r in 1...k
+    detRho <- sapply(1:k, function(r) det(rhoLambda[, , r]))
+    sumLogLLH <- 0
+    for (i in 1:n)
+    {
+      # Update gam[,]; use log to avoid numerical problems
+      logGam <- sapply(1:k, function(r) {
+        log(piLambda[r]) + log(detRho[r]) - 0.5 *
+          sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2)
+      })
+      
+      logGam <- logGam - max(logGam) #adjust without changing proportions
+      gam <- exp(logGam)
+      print(gam)
+      norm_fact <- sum(gam)
+      sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2))
+    }
+    llhLambda <- c(sumLogLLH/n, (dimension + m + 1) * k - 1)
+    # densite <- vector("double", n)
+    # for (r in 1:k)
+    # {
+    #   if (length(col.sel) == 1)
+    #   {
+    #     delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r])))
+    #   } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r]))
+    #   densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m * 
+    #     exp(-rowSums(delta^2)/2)
+    # }
+    # llhLambda <- c(mean(log(densite)), (dimension + m + 1) * k - 1)
+    list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda)
+  }
+
+  # For each lambda, computation of the parameters
+  out <-
+    if (ncores > 1) {
+      parLapply(cl, 1:length(S), computeAtLambda)
+    } else {
+      lapply(1:length(S), computeAtLambda)
+    }
+
+  if (ncores > 1) 
+    parallel::stopCluster(cl)
+
+  out
+}
diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R
new file mode 100644
index 0000000..dc88f67
--- /dev/null
+++ b/pkg/R/constructionModelesLassoRank.R
@@ -0,0 +1,95 @@
+#' constructionModelesLassoRank
+#'
+#' Construct a collection of models with the Lasso-Rank procedure.
+#'
+#' @param S output of selectVariables.R
+#' @param k number of components
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
+#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
+#' @param ncores Number of cores, by default = 3
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#'
+#' @return a list with several models, defined by phi, rho, pi, llh
+#'
+#' @export
+constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, 
+  ncores, fast, verbose)
+{
+  n <- nrow(X)
+  p <- ncol(X)
+  m <- ncol(Y)
+  L <- length(S)
+
+  # Possible interesting ranks
+  deltaRank <- rank.max - rank.min + 1
+  Size <- deltaRank^k
+  RankLambda <- matrix(0, nrow = Size * L, ncol = k + 1)
+  for (r in 1:k)
+  {
+    # On veut le tableau de toutes les combinaisons de rangs possibles, et des
+    # lambdas Dans la premiÃ¨re colonne : on rÃ©pÃ¨te (rank.max-rank.min)^(k-1) chaque
+    # chiffre : Ã§a remplit la colonne Dans la deuxieme : on rÃ©pÃ¨te
+    # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait Ã§a (rank.max-rank.min)^2
+    # fois ...  Dans la derniÃ¨re, on rÃ©pÃ¨te chaque chiffre une fois, et on fait Ã§a
+    # (rank.min-rank.max)^(k-1) fois.
+    RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1), 
+      each = deltaRank^(k - r)), each = L)
+  }
+  RankLambda[, k + 1] <- rep(1:L, times = Size)
+
+  if (ncores > 1)
+  {
+    cl <- parallel::makeCluster(ncores, outfile = "")
+    parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size", 
+      "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", 
+      "verbose"))
+  }
+
+  computeAtLambda <- function(index)
+  {
+    lambdaIndex <- RankLambda[index, k + 1]
+    rankIndex <- RankLambda[index, 1:k]
+    if (ncores > 1) 
+      require("valse")  #workers start with an empty environment
+
+    # 'relevant' will be the set of relevant columns
+    selected <- S[[lambdaIndex]]$selected
+    relevant <- c()
+    for (j in 1:p)
+    {
+      if (length(selected[[j]]) > 0)
+        relevant <- c(relevant, j)
+    }
+    if (max(rankIndex) < length(relevant))
+    {
+      phi <- array(0, dim = c(p, m, k))
+      if (length(relevant) > 0)
+      {
+        res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, 
+          X[, relevant], Y, eps, rankIndex, fast)
+        llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m)))
+        phi[relevant, , ] <- res$phi
+      }
+      list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho)
+    }
+  }
+
+  # For each lambda in the grid we compute the estimators
+  out <-
+    if (ncores > 1) {
+      parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
+    } else {
+      lapply(seq_len(length(S) * Size), computeAtLambda)
+    }
+
+  if (ncores > 1) 
+    parallel::stopCluster(cl)
+
+  out
+}
diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R
new file mode 100644
index 0000000..064b54b
--- /dev/null
+++ b/pkg/R/generateXY.R
@@ -0,0 +1,39 @@
+#' generateXY 
+#'
+#' Generate a sample of (X,Y) of size n
+#'
+#' @param n sample size
+#' @param Ï proportion for each cluster
+#' @param meanX matrix of group means for covariates (of size p)
+#' @param covX covariance for covariates (of size p*p)
+#' @param Î² regression matrix, of size p*m*k
+#' @param covY covariance for the response vector (of size m*m*K)
+#'
+#' @return list with X and Y
+#'
+#' @export
+generateXY <- function(n, Ï, meanX, Î², covX, covY)
+{
+  p <- dim(covX)[1]
+  m <- dim(covY)[1]
+  k <- dim(covY)[3]
+
+  X <- matrix(nrow = 0, ncol = p)
+  Y <- matrix(nrow = 0, ncol = m)
+
+  # random generation of the size of each population in X~Y (unordered)
+  sizePop <- rmultinom(1, n, Ï)
+  class <- c() #map i in 1:n --> index of class in 1:k
+
+  for (i in 1:k)
+  {
+    class <- c(class, rep(i, sizePop[i]))
+    newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
+    X <- rbind(X, newBlockX)
+    Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% 
+      Î²[, , i], covY[, , i]))))
+  }
+
+  shuffle <- sample(n)
+  list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle])
+}
diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R
new file mode 100644
index 0000000..44b4b06
--- /dev/null
+++ b/pkg/R/initSmallEM.R
@@ -0,0 +1,79 @@
+#' initialization of the EM algorithm 
+#'
+#' @param k number of components
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#'
+#' @return a list with phiInit, rhoInit, piInit, gamInit
+#' @export
+#' @importFrom methods new
+#' @importFrom stats cutree dist hclust runif
+initSmallEM <- function(k, X, Y, fast)
+{
+  n <- nrow(X)
+  p <- ncol(X)
+  m <- ncol(Y)
+  nIte <- 20
+  Zinit1 <- array(0, dim = c(n, nIte))
+  betaInit1 <- array(0, dim = c(p, m, k, nIte))
+  sigmaInit1 <- array(0, dim = c(m, m, k, nIte))
+  phiInit1 <- array(0, dim = c(p, m, k, nIte))
+  rhoInit1 <- array(0, dim = c(m, m, k, nIte))
+  Gam <- matrix(0, n, k)
+  piInit1 <- matrix(0, nIte, k)
+  gamInit1 <- array(0, dim = c(n, k, nIte))
+  LLFinit1 <- list()
+
+  # require(MASS) #Moore-Penrose generalized inverse of matrix
+  for (repet in 1:nIte)
+  {
+    distance_clus <- dist(cbind(X, Y))
+    tree_hier <- hclust(distance_clus)
+    Zinit1[, repet] <- cutree(tree_hier, k)
+
+    for (r in 1:k)
+    {
+      Z <- Zinit1[, repet]
+      Z_indice <- seq_len(n)[Z == r]  #renvoit les indices oÃ¹ Z==r
+      if (length(Z_indice) == 1) {
+        betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% 
+          crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
+      } else {
+        betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% 
+          crossprod(X[Z_indice, ], Y[Z_indice, ])
+      }
+      sigmaInit1[, , r, repet] <- diag(m)
+      phiInit1[, , r, repet] <- betaInit1[, , r, repet]  #/ sigmaInit1[,,r,repet]
+      rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
+      piInit1[repet, r] <- mean(Z == r)
+    }
+
+    for (i in 1:n)
+    {
+      for (r in 1:k)
+      {
+        dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
+          - X[i, ] %*% phiInit1[, , r, repet])
+        Gam[i, r] <- piInit1[repet, r] * 
+          gdet(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
+      }
+      sumGamI <- sum(Gam[i, ])
+      gamInit1[i, , repet] <- Gam[i, ]/sumGamI
+    }
+
+    miniInit <- 10
+    maxiInit <- 11
+
+    init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ],
+      gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y,
+      eps = 1e-04, fast)
+    LLFinit1[[repet]] <- init_EMG$llh
+  }
+  b <- which.min(LLFinit1)
+  phiInit <- phiInit1[, , , b]
+  rhoInit <- rhoInit1[, , , b]
+  piInit <- piInit1[b, ]
+  gamInit <- gamInit1[, , b]
+
+  return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
+}
diff --git a/pkg/R/main.R b/pkg/R/main.R
new file mode 100644
index 0000000..d710b7e
--- /dev/null
+++ b/pkg/R/main.R
@@ -0,0 +1,161 @@
+#' valse 
+#'
+#' Main function
+#'
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param procedure among 'LassoMLE' or 'LassoRank'
+#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC'
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param kmin integer, minimum number of clusters, by default = 2
+#' @param kmax integer, maximum number of clusters, by default = 10
+#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
+#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
+#' @param ncores_outer Number of cores for the outer loop on k
+#' @param ncores_inner Number of cores for the inner loop on lambda
+#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
+#' @param compute_grid_lambda, TRUE to compute the grid, FALSE if known (in arguments)
+#' @param grid_lambda, a vector with regularization parameters if known, by default 0
+#' @param size_coll_mod (Maximum) size of a collection of models
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#'
+#' @return a list with estimators of parameters
+#'
+#' @examples
+#' #TODO: a few examples
+#' @export
+valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, 
+  maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, 
+  ncores_inner = 1, thresh = 1e-08, compute_grid_lambda = TRUE, grid_lambda = 0, size_coll_mod = 10, fast = TRUE, verbose = FALSE, 
+  plot = TRUE)
+{
+  n <- nrow(X)
+  p <- ncol(X)
+  m <- ncol(Y)
+
+  if (verbose) 
+    print("main loop: over all k and all lambda")
+
+  if (ncores_outer > 1) {
+    cl <- parallel::makeCluster(ncores_outer, outfile = "")
+    parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", 
+      "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", 
+      "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", 
+      "size_coll_mod", "verbose", "p", "m"))
+  }
+
+  # Compute models with k components
+  computeModels <- function(k)
+  {
+    if (ncores_outer > 1) 
+      require("valse")  #nodes start with an empty environment
+
+    if (verbose) 
+      print(paste("Parameters initialization for k =", k))
+    # smallEM initializes parameters by k-means and regression model in each
+    # component, doing this 20 times, and keeping the values maximizing the
+    # likelihood after 10 iterations of the EM algorithm.
+    P <- initSmallEM(k, X, Y, fast)
+    if (compute_grid_lambda == TRUE)
+    {
+      grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, 
+                                       X, Y, gamma, mini, maxi, eps, fast)
+    }
+    if (length(grid_lambda) > size_coll_mod) 
+      grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
+
+    if (verbose) 
+      print("Compute relevant parameters")
+    # select variables according to each regularization parameter from the grid:
+    # S$selected corresponding to selected variables
+    S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, 
+      gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
+
+    if (procedure == "LassoMLE") {
+      if (verbose) 
+        print("run the procedure Lasso-MLE")
+      # compute parameter estimations, with the Maximum Likelihood Estimator,
+      # restricted on selected variables.
+      models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, 
+        P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
+    } else {
+      if (verbose) 
+        print("run the procedure Lasso-Rank")
+      # compute parameter estimations, with the Low Rank Estimator, restricted on
+      # selected variables.
+      models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min, 
+        rank.max, ncores_inner, fast, verbose)
+    }
+    # warning! Some models are NULL after running selectVariables
+    models <- models[sapply(models, function(cell) !is.null(cell))]
+    models
+  }
+
+  # List (index k) of lists (index lambda) of models
+  models_list <-
+    if (ncores_outer > 1) {
+      parLapply(cl, kmin:kmax, computeModels)
+    } else {
+      lapply(kmin:kmax, computeModels)
+    }
+  if (ncores_outer > 1) 
+    parallel::stopCluster(cl)
+
+  if (!requireNamespace("capushe", quietly = TRUE))
+  {
+    warning("'capushe' not available: returning all models")
+    return(models_list)
+  }
+
+  # Get summary 'tableauRecap' from models
+  tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
+  {
+    models <- models_list[[i]]
+    # For a collection of models (same k, several lambda):
+    LLH <- sapply(models, function(model) model$llh[1])
+    k <- length(models[[1]]$pi)
+    sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[, 
+      , 1] != 0) + 1) - 1)
+    data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, 
+      complexity = sumPen, contrast = -LLH)
+  }))
+  tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
+  if (verbose == TRUE)
+  {
+    print(tableauRecap)
+  }
+  modSel <- capushe::capushe(tableauRecap, n)
+  indModSel <- if (selecMod == "DDSE") 
+    as.numeric(modSel@DDSE@model) else if (selecMod == "Djump") 
+    as.numeric(modSel@Djump@model) else if (selecMod == "BIC") 
+    modSel@BIC_capushe$model else if (selecMod == "AIC") 
+    modSel@AIC_capushe$model
+
+  mod <- as.character(tableauRecap[indModSel, 1])
+  listMod <- as.integer(unlist(strsplit(mod, "[.]")))
+  modelSel <- models_list[[listMod[1]]][[listMod[2]]]
+
+  ## Affectations
+  Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n)
+  for (i in 1:n)
+  {
+    for (r in 1:length(modelSel$pi))
+    {
+      sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2)
+      Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * gdet(modelSel$rho[, , r])
+    }
+  }
+  Gam <- Gam/rowSums(Gam)
+  modelSel$affec <- apply(Gam, 1, which.max)
+  modelSel$proba <- Gam
+  modelSel$tableau <- tableauRecap
+
+  if (plot)
+    print(plot_valse(X, Y, modelSel, n))
+
+  return(modelSel)
+}
diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R
new file mode 100644
index 0000000..ec2302d
--- /dev/null
+++ b/pkg/R/plot_valse.R
@@ -0,0 +1,89 @@
+#' Plot 
+#'
+#' It is a function which plots relevant parameters
+#'
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param model the model constructed by valse procedure
+#' @param n sample size
+#' @return several plots
+#'
+#' @examples TODO
+#'
+#' @export
+#'
+plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
+{
+  require("gridExtra")
+  require("ggplot2")
+  require("reshape2")
+  require("cowplot")
+
+  K <- length(model$pi)
+  ## regression matrices
+  gReg <- list()
+  for (r in 1:K)
+  {
+    Melt <- melt(t((model$phi[, , r])))
+    gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + 
+      geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
+      midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
+  }
+  print(gReg)
+
+  ## Differences between two clusters
+  if (comp)
+  {
+    if (is.na(k1) || is.na(k))
+      print("k1 and k2 must be integers, representing the clusters you want to compare")
+    Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
+    gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
+      + geom_tile()
+      + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
+        space = "Lab")
+      + ggtitle(paste("Difference between regression matrices in cluster", 
+        k1, "and", k2))
+    print(gDiff)
+  }
+
+  ### Covariance matrices
+  matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
+  for (r in 1:K)
+    matCov[, r] <- diag(model$rho[, , r])
+  MeltCov <- melt(matCov)
+  gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
+    + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
+      space = "Lab")
+    + ggtitle("Covariance matrices")
+  print(gCov)
+
+  ### Proportions
+  gam2 <- matrix(NA, ncol = K, nrow = n)
+  for (i in 1:n)
+    gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
+
+  bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2))
+    + geom_boxplot()
+    + theme(legend.position = "none")
+    + background_grid(major = "xy", minor = "none")
+  print(bp)
+
+  ### Mean in each cluster
+  XY <- cbind(X, Y)
+  XY_class <- list()
+  meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2])
+  for (r in 1:K)
+  {
+    XY_class[[r]] <- XY[model$affec == r, ]
+    if (sum(model$affec == r) == 1) {
+      meanPerClass[, r] <- XY_class[[r]]
+    } else {
+      meanPerClass[, r] <- apply(XY_class[[r]], 2, mean)
+    }
+  }
+  data <- data.frame(mean = as.vector(meanPerClass),
+    cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
+  g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
+  print(g + geom_line(aes(linetype = cluster, color = cluster))
+    + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster"))
+}
diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R
new file mode 100644
index 0000000..39e54d2
--- /dev/null
+++ b/pkg/R/selectVariables.R
@@ -0,0 +1,81 @@
+#' selectVariables 
+#'
+#' It is a function which construct, for a given lambda, the sets of relevant variables.
+#'
+#' @param phiInit an initial estimator for phi (size: p*m*k)
+#' @param rhoInit an initial estimator for rho (size: m*m*k)
+#' @param piInit an initial estimator for pi (size : k)
+#' @param gamInit an initial estimator for gamma
+#' @param mini  minimum number of iterations in EM algorithm
+#' @param maxi  maximum number of iterations in EM algorithm
+#' @param gamma  power in the penalty
+#' @param glambda grid of regularization parameters
+#' @param X    matrix of regressors
+#' @param Y    matrix of responses
+#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
+#' @param eps   threshold to say that EM algorithm has converged
+#' @param ncores Number or cores for parallel execution (1 to disable)
+#'
+#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
+#'
+#' @examples TODO
+#'
+#' @export
+#'
+selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, 
+  glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast)
+{
+  if (ncores > 1) {
+    cl <- parallel::makeCluster(ncores, outfile = "")
+    parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", 
+      "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
+  }
+
+  # Computation for a fixed lambda
+  computeCoefs <- function(lambda)
+  {
+    params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+      X, Y, eps, fast)
+
+    p <- ncol(X)
+    m <- ncol(Y)
+
+    # selectedVariables: list where element j contains vector of selected variables
+    # in [1,m]
+    selectedVariables <- lapply(1:p, function(j) {
+      # from boolean matrix mxk of selected variables obtain the corresponding boolean
+      # m-vector, and finally return the corresponding indices
+      if (m>1) {
+        seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
+      } else {
+        if (any(params$phi[j, , ] > thresh))
+          1
+        else
+          numeric(0)
+      }
+    })
+
+    list(selected = selectedVariables, Rho = params$rho, Pi = params$pi)
+  }
+
+  # For each lambda in the grid, we compute the coefficients
+  out <-
+    if (ncores > 1) {
+      parLapply(cl, glambda, computeCoefs)
+    } else {
+      lapply(glambda, computeCoefs)
+    }
+  if (ncores > 1) 
+    parallel::stopCluster(cl)
+  # Suppress models which are computed twice En fait, ca ca fait la comparaison de
+  # tous les parametres On veut juste supprimer ceux qui ont les memes variables
+  # sÃ©lectionnÃ©es
+  # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ]
+  selec <- lapply(out, function(model) model$selected)
+  ind_dup <- duplicated(selec)
+  ind_uniq <- which(!ind_dup)
+  out2 <- list()
+  for (l in 1:length(ind_uniq))
+    out2[[l]] <- out[[ind_uniq[l]]]
+  out2
+}
diff --git a/pkg/R/util.R b/pkg/R/util.R
new file mode 100644
index 0000000..f8b01cc
--- /dev/null
+++ b/pkg/R/util.R
@@ -0,0 +1,7 @@
+# ...
+gdet <- function(M)
+{
+	if (is.matrix(M))
+		return (det(M))
+	return (M[1]) #numeric, double
+}
diff --git a/pkg/data/data.RData b/pkg/data/data.RData
new file mode 100644
index 0000000..a9f09e1
Binary files /dev/null and b/pkg/data/data.RData differ
diff --git a/pkg/data/data2.RData b/pkg/data/data2.RData
new file mode 100644
index 0000000..80003e3
Binary files /dev/null and b/pkg/data/data2.RData differ
diff --git a/pkg/data/script_data.R b/pkg/data/script_data.R
new file mode 100644
index 0000000..1585337
--- /dev/null
+++ b/pkg/data/script_data.R
@@ -0,0 +1,15 @@
+m=11
+p=10
+
+covY = array(0,dim = c(m,m,2))
+covY[,,1] = diag(m)
+covY[,,2] = diag(m)
+
+Beta = array(0, dim = c(p, m, 2))
+Beta[1:4,1:4,1] = 3*diag(4)
+Beta[1:4,1:4,2] = -2*diag(4)
+
+Data = generateXY(100, c(0.5,0.5), rep(0,p), Beta, diag(p), covY)
+
+Res = valse(Data$X,Data$Y, fast=FALSE, plot=FALSE, verbose = TRUE, kmax=2, compute_grid_lambda = FALSE, 
+            grid_lambda = seq(0.2,2,length = 50), size_coll_mod = 50)
diff --git a/pkg/inst/testdata/TODO.csv b/pkg/inst/testdata/TODO.csv
new file mode 100644
index 0000000..d679966
--- /dev/null
+++ b/pkg/inst/testdata/TODO.csv
@@ -0,0 +1 @@
+ou alors data_test.RData, possible aussi
diff --git a/pkg/man/valse-package.Rd b/pkg/man/valse-package.Rd
new file mode 100644
index 0000000..534375b
--- /dev/null
+++ b/pkg/man/valse-package.Rd
@@ -0,0 +1,37 @@
+\name{valse-package}
+\alias{valse-package}
+\alias{valse}
+\docType{package}
+
+\title{
+	\packageTitle{valse}
+}
+
+\description{
+	\packageDescription{valse}
+}
+
+\details{
+	The package devtools should be useful in development stage, since we rely on testthat for
+	unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette.
+	Concerning the other suggested packages:
+	\itemize{
+		\item{parallel (generally) permits to run the bootstrap method faster.}
+	}
+
+	The three main functions are ...
+}
+
+\author{
+	\packageAuthor{valse}
+
+	Maintainer: \packageMaintainer{valse}
+}
+
+%\references{
+%	TODO: Literature or other references for background information
+%}
+
+%\examples{
+%	TODO: simple examples of the most important functions
+%}
diff --git a/pkg/src/Makevars b/pkg/src/Makevars
new file mode 100644
index 0000000..50b7fb6
--- /dev/null
+++ b/pkg/src/Makevars
@@ -0,0 +1,11 @@
+#Debug flags
+PKG_CFLAGS=-g -I./sources
+
+#Prod flags:
+#PKG_CFLAGS=-O2 -I./sources
+
+PKG_LIBS=-lm -lgsl -lcblas
+
+SOURCES = $(wildcard adapters/*.c sources/*.c)
+
+OBJECTS = $(SOURCES:.c=.o)
diff --git a/pkg/src/adapters/a.EMGLLF.c b/pkg/src/adapters/a.EMGLLF.c
new file mode 100644
index 0000000..f24cd2a
--- /dev/null
+++ b/pkg/src/adapters/a.EMGLLF.c
@@ -0,0 +1,91 @@
+#include <R.h>
+#include <Rdefines.h>
+#include "EMGLLF.h"
+
+// See comments in src/sources/EMGLLF.c and R/EMGLLF.R (wrapper)
+SEXP EMGLLF(
+	SEXP phiInit_,
+	SEXP rhoInit_,
+	SEXP piInit_,
+	SEXP gamInit_,
+	SEXP mini_,
+	SEXP maxi_,
+	SEXP gamma_,
+	SEXP lambda_,
+	SEXP X_,
+	SEXP Y_,
+	SEXP tau_
+) {
+	// Get matrices dimensions
+	int n = INTEGER(getAttrib(X_, R_DimSymbol))[0];
+	SEXP dim = getAttrib(phiInit_, R_DimSymbol);
+	int p = INTEGER(dim)[0];
+	int m = INTEGER(dim)[1];
+	int k = INTEGER(dim)[2];
+
+	////////////
+	// INPUTS //
+	////////////
+
+	// get scalar parameters
+	int mini = INTEGER_VALUE(mini_);
+	int maxi = INTEGER_VALUE(maxi_);
+	double gamma = NUMERIC_VALUE(gamma_);
+	double lambda = NUMERIC_VALUE(lambda_);
+	double tau = NUMERIC_VALUE(tau_);
+
+	// Get pointers from SEXP arrays ; WARNING: by columns !
+	double* phiInit = REAL(phiInit_);
+	double* rhoInit = REAL(rhoInit_);
+	double* piInit = REAL(piInit_);
+	double* gamInit = REAL(gamInit_);
+	double* X = REAL(X_);
+	double* Y = REAL(Y_);
+
+	/////////////
+	// OUTPUTS //
+	/////////////
+
+	SEXP phi, rho, pi, LLF, S, affec, dimPhiS, dimRho;
+	PROTECT(dimPhiS = allocVector(INTSXP, 3));
+	int* pDimPhiS = INTEGER(dimPhiS);
+	pDimPhiS[0] = p; pDimPhiS[1] = m; pDimPhiS[2] = k;
+	PROTECT(dimRho = allocVector(INTSXP, 3));
+	int* pDimRho = INTEGER(dimRho);
+	pDimRho[0] = m; pDimRho[1] = m; pDimRho[2] = k;
+	PROTECT(phi = allocArray(REALSXP, dimPhiS));
+	PROTECT(rho = allocArray(REALSXP, dimRho));
+	PROTECT(pi = allocVector(REALSXP, k));
+	PROTECT(LLF = allocVector(REALSXP, maxi-mini+1));
+	PROTECT(S = allocArray(REALSXP, dimPhiS));
+	PROTECT(affec = allocVector(INTSXP, n));
+	double *pPhi=REAL(phi), *pRho=REAL(rho), *pPi=REAL(pi), *pLLF=REAL(LLF), *pS=REAL(S);
+	int *pAffec=INTEGER(affec);
+
+	////////////////////
+	// Call to EMGLLF //
+	////////////////////
+
+	EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,
+		pPhi,pRho,pPi,pLLF,pS,pAffec,
+		n,p,m,k);
+
+	// Build list from OUT params and return it
+	SEXP listParams, listNames;
+	int nouts = 6;
+	PROTECT(listParams = allocVector(VECSXP, nouts));
+	char* lnames[6] = {"phi", "rho", "pi", "LLF", "S", "affec"}; //lists labels
+	PROTECT(listNames = allocVector(STRSXP,nouts));
+	for (int i=0; i<nouts; i++)
+		SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
+	setAttrib(listParams, R_NamesSymbol, listNames);
+	SET_VECTOR_ELT(listParams, 0, phi);
+	SET_VECTOR_ELT(listParams, 1, rho);
+	SET_VECTOR_ELT(listParams, 2, pi);
+	SET_VECTOR_ELT(listParams, 3, LLF);
+	SET_VECTOR_ELT(listParams, 4, S);
+	SET_VECTOR_ELT(listParams, 5, affec);
+
+	UNPROTECT(10);
+	return listParams;
+}
diff --git a/pkg/src/adapters/a.EMGrank.c b/pkg/src/adapters/a.EMGrank.c
new file mode 100644
index 0000000..b1dad9b
--- /dev/null
+++ b/pkg/src/adapters/a.EMGrank.c
@@ -0,0 +1,73 @@
+#include <R.h>
+#include <Rdefines.h>
+#include "EMGrank.h"
+
+// See comments in src/sources/EMGrank.c and R/EMGrank.R (wrapper)
+SEXP EMGrank(
+	SEXP Pi_,
+	SEXP Rho_,
+	SEXP mini_,
+	SEXP maxi_,
+	SEXP X_,
+	SEXP Y_,
+	SEXP tau_,
+	SEXP rank_
+) {
+	// Get matrices dimensions
+	SEXP dimX = getAttrib(X_, R_DimSymbol);
+	int n = INTEGER(dimX)[0];
+	int p = INTEGER(dimX)[1];
+	SEXP dimRho = getAttrib(Rho_, R_DimSymbol);
+	int m = INTEGER(dimRho)[0];
+	int k = INTEGER(dimRho)[2];
+
+	////////////
+	// INPUTS //
+	////////////
+
+	// get scalar parameters
+	int mini = INTEGER_VALUE(mini_);
+	int maxi = INTEGER_VALUE(maxi_);
+	double tau = NUMERIC_VALUE(tau_);
+
+	// Get pointers from SEXP arrays ; WARNING: by columns !
+	double* Pi = REAL(Pi_);
+	double* Rho = REAL(Rho_);
+	double* X = REAL(X_);
+	double* Y = REAL(Y_);
+	int* rank = INTEGER(rank_);
+
+	/////////////
+	// OUTPUTS //
+	/////////////
+
+	SEXP phi, LLF, dimPhi;
+	PROTECT(dimPhi = allocVector(INTSXP, 3));
+	int* pDimPhi = INTEGER(dimPhi);
+	pDimPhi[0] = p; pDimPhi[1] = m; pDimPhi[2] = k;
+	PROTECT(phi = allocArray(REALSXP, dimPhi));
+	PROTECT(LLF = allocVector(REALSXP, 1));
+	double *pPhi=REAL(phi), *pLLF=REAL(LLF);
+
+	/////////////////////
+	// Call to EMGrank //
+	/////////////////////
+
+	EMGrank_core(Pi, Rho, mini, maxi, X, Y, tau, rank,
+		pPhi,pLLF,
+		n,p,m,k);
+
+	// Build list from OUT params and return it
+	SEXP listParams, listNames;
+	PROTECT(listParams = allocVector(VECSXP, 2));
+	char* lnames[2] = {"phi", "LLF"}; //lists labels
+	PROTECT(listNames = allocVector(STRSXP,2));
+	for (int i=0; i<2; i++)
+		SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
+	setAttrib(listParams, R_NamesSymbol, listNames);
+	SET_VECTOR_ELT(listParams, 0, phi);
+	SET_VECTOR_ELT(listParams, 1, LLF);
+
+	UNPROTECT(5);
+	return listParams;
+}
diff --git a/pkg/src/sources/EMGLLF.c b/pkg/src/sources/EMGLLF.c
new file mode 100644
index 0000000..d2f5a8e
--- /dev/null
+++ b/pkg/src/sources/EMGLLF.c
@@ -0,0 +1,412 @@
+#include "utils.h"
+#include <stdlib.h>
+#include <math.h>
+#include <gsl/gsl_linalg.h>
+
+// TODO: don't recompute indexes ai(...) and mi(...) when possible
+void EMGLLF_core(
+	// IN parameters
+	const Real* phiInit, // parametre initial de moyenne renormalisÃ©
+	const Real* rhoInit, // parametre initial de variance renormalisÃ©
+	const Real* piInit,	 // parametre initial des proportions
+	const Real* gamInit, // paramÃ¨tre initial des probabilitÃ©s a posteriori de chaque Ã©chantillon
+	int mini, // nombre minimal d'itÃ©rations dans l'algorithme EM
+	int maxi, // nombre maximal d'itÃ©rations dans l'algorithme EM
+	Real gamma, // puissance des proportions dans la pÃ©nalisation pour un Lasso adaptatif
+	Real lambda, // valeur du paramÃ¨tre de rÃ©gularisation du Lasso
+	const Real* X, // rÃ©gresseurs
+	const Real* Y, // rÃ©ponse
+	Real tau, // seuil pour accepter la convergence
+	// OUT parameters (all pointers, to be modified)
+	Real* phi, // parametre de moyenne renormalisÃ©, calculÃ© par l'EM
+	Real* rho, // parametre de variance renormalisÃ©, calculÃ© par l'EM
+	Real* pi, // parametre des proportions renormalisÃ©, calculÃ© par l'EM
+	Real* llh, // (derniere) log vraisemblance associÃ©e Ã  cet Ã©chantillon,
+	           // pour les valeurs estimÃ©es des paramÃ¨tres
+	Real* S,
+	int* affec,
+	// additional size parameters
+	int n, // nombre d'echantillons
+	int p, // nombre de covariables
+	int m, // taille de Y (multivariÃ©)
+	int k) // nombre de composantes dans le mÃ©lange
+{
+	//Initialize outputs
+	copyArray(phiInit, phi, p*m*k);
+	copyArray(rhoInit, rho, m*m*k);
+	copyArray(piInit, pi, k);
+	//S is already allocated, and doesn't need to be 'zeroed'
+
+	//Other local variables: same as in R
+	Real* gam = (Real*)malloc(n*k*sizeof(Real));
+	copyArray(gamInit, gam, n*k);
+	Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real));
+	Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real));
+	Real* b = (Real*)malloc(k*sizeof(Real));
+	Real* X2 = (Real*)malloc(n*p*k*sizeof(Real));
+	Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real));
+	*llh = -INFINITY;
+	Real* pi2 = (Real*)malloc(k*sizeof(Real));
+	const Real EPS = 1e-15;
+	// Additional (not at this place, in R file)
+	Real* gam2 = (Real*)malloc(k*sizeof(Real));
+	Real* sqNorm2 = (Real*)malloc(k*sizeof(Real));
+	Real* detRho = (Real*)malloc(k*sizeof(Real));
+	gsl_matrix* matrix = gsl_matrix_alloc(m, m);
+	gsl_permutation* permutation = gsl_permutation_alloc(m);
+	Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
+	Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
+	const Real gaussConstM = pow(2.*M_PI,m/2.);
+	Real* Phi = (Real*)malloc(p*m*k*sizeof(Real));
+	Real* Rho = (Real*)malloc(m*m*k*sizeof(Real));
+	Real* Pi = (Real*)malloc(k*sizeof(Real));
+
+	for (int ite=1; ite<=maxi; ite++)
+	{
+		copyArray(phi, Phi, p*m*k);
+		copyArray(rho, Rho, m*m*k);
+		copyArray(pi, Pi, k);
+
+		// Calculs associÃ©s a Y et X
+		for (int r=0; r<k; r++)
+		{
+			for (int mm=0; mm<m; mm++)
+			{
+				//Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
+				for (int u=0; u<n; u++)
+					Y2[ai(u,mm,r,n,m,k)] = sqrt(gam[mi(u,r,n,k)]) * Y[mi(u,mm,n,m)];
+			}
+			for (int i=0; i<n; i++)
+			{
+				//X2[i,,r] = sqrt(gam[i,r]) * X[i,]
+				for (int u=0; u<p; u++)
+					X2[ai(i,u,r,n,p,k)] = sqrt(gam[mi(i,r,n,k)]) * X[mi(i,u,n,p)];
+			}
+			for (int mm=0; mm<m; mm++)
+			{
+				//ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+				for (int u=0; u<p; u++)
+				{
+					Real dotProduct = 0.;
+					for (int v=0; v<n; v++)
+						dotProduct += X2[ai(v,u,r,n,p,k)] * Y2[ai(v,mm,r,n,m,k)];
+					ps2[ai(u,mm,r,p,m,k)] = dotProduct;
+				}
+			}
+			for (int j=0; j<p; j++)
+			{
+				for (int s=0; s<p; s++)
+				{
+					//Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+					Real dotProduct = 0.;
+					for (int u=0; u<n; u++)
+						dotProduct += X2[ai(u,j,r,n,p,k)] * X2[ai(u,s,r,n,p,k)];
+					Gram2[ai(j,s,r,p,p,k)] = dotProduct;
+				}
+			}
+		}
+
+		/////////////
+		// Etape M //
+		/////////////
+
+		// Pour pi
+		for (int r=0; r<k; r++)
+		{
+			//b[r] = sum(abs(phi[,,r]))
+			Real sumAbsPhi = 0.;
+			for (int u=0; u<p; u++)
+				for (int v=0; v<m; v++)
+					sumAbsPhi += fabs(phi[ai(u,v,r,p,m,k)]);
+			b[r] = sumAbsPhi;
+		}
+		//gam2 = colSums(gam)
+		for (int u=0; u<k; u++)
+		{
+			Real sumOnColumn = 0.;
+			for (int v=0; v<n; v++)
+				sumOnColumn += gam[mi(v,u,n,k)];
+			gam2[u] = sumOnColumn;
+		}
+		//a = sum(gam %*% log(pi))
+		Real a = 0.;
+		for (int u=0; u<n; u++)
+		{
+			Real dotProduct = 0.;
+			for (int v=0; v<k; v++)
+				dotProduct += gam[mi(u,v,n,k)] * log(pi[v]);
+			a += dotProduct;
+		}
+
+		//tant que les proportions sont negatives
+		int kk = 0,
+			pi2AllPositive = 0;
+		Real invN = 1./n;
+		while (!pi2AllPositive)
+		{
+			//pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
+			Real pow_01_kk = pow(0.1,kk);
+			for (int r=0; r<k; r++)
+				pi2[r] = pi[r] + pow_01_kk * (invN*gam2[r] - pi[r]);
+			//pi2AllPositive = all(pi2 >= 0)
+			pi2AllPositive = 1;
+			for (int r=0; r<k; r++)
+			{
+				if (pi2[r] < 0)
+				{
+					pi2AllPositive = 0;
+					break;
+				}
+			}
+			kk++;
+		}
+
+		//sum(pi^gamma * b)
+		Real piPowGammaDotB = 0.;
+		for (int v=0; v<k; v++)
+			piPowGammaDotB += pow(pi[v],gamma) * b[v];
+		//sum(pi2^gamma * b)
+		Real pi2PowGammaDotB = 0.;
+		for (int v=0; v<k; v++)
+			pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
+		//sum(gam2 * log(pi2))
+		Real gam2DotLogPi2 = 0.;
+		for (int v=0; v<k; v++)
+			gam2DotLogPi2 += gam2[v] * log(pi2[v]);
+
+		//t(m) la plus grande valeur dans la grille O.1^k tel que ce soit dÃ©croissante ou constante
+		while (-invN*a + lambda*piPowGammaDotB < -invN*gam2DotLogPi2 + lambda*pi2PowGammaDotB
+			&& kk<1000)
+		{
+			Real pow_01_kk = pow(0.1,kk);
+			//pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
+			for (int v=0; v<k; v++)
+				pi2[v] = pi[v] + pow_01_kk * (invN*gam2[v] - pi[v]);
+			//pi2 was updated, so we recompute pi2PowGammaDotB and gam2DotLogPi2
+			pi2PowGammaDotB = 0.;
+			for (int v=0; v<k; v++)
+				pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
+			gam2DotLogPi2 = 0.;
+			for (int v=0; v<k; v++)
+				gam2DotLogPi2 += gam2[v] * log(pi2[v]);
+			kk++;
+		}
+		Real t = pow(0.1,kk);
+		//sum(pi + t*(pi2-pi))
+		Real sumPiPlusTbyDiff = 0.;
+		for (int v=0; v<k; v++)
+			sumPiPlusTbyDiff += (pi[v] + t*(pi2[v] - pi[v]));
+		//pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+		for (int v=0; v<k; v++)
+			pi[v] = (pi[v] + t*(pi2[v] - pi[v])) / sumPiPlusTbyDiff;
+
+		//Pour phi et rho
+		for (int r=0; r<k; r++)
+		{
+			for (int mm=0; mm<m; mm++)
+			{
+				Real ps = 0.,
+					nY2 = 0.;
+				// Compute ps, and nY2 = sum(Y2[,mm,r]^2)
+				for (int i=0; i<n; i++)
+				{
+					//< X2[i,,r] , phi[,mm,r] >
+					Real dotProduct = 0.;
+					for (int u=0; u<p; u++)
+						dotProduct += X2[ai(i,u,r,n,p,k)] * phi[ai(u,mm,r,p,m,k)];
+					//ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
+					ps += Y2[ai(i,mm,r,n,m,k)] * dotProduct;
+					nY2 += Y2[ai(i,mm,r,n,m,k)] * Y2[ai(i,mm,r,n,m,k)];
+				}
+				//rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
+				rho[ai(mm,mm,r,m,m,k)] = (ps + sqrt(ps*ps + 4*nY2 * gam2[r])) / (2*nY2);
+			}
+		}
+
+		for (int r=0; r<k; r++)
+		{
+			for (int j=0; j<p; j++)
+			{
+				for (int mm=0; mm<m; mm++)
+				{
+					//sum(phi[-j,mm,r] * Gram2[j,-j,r])
+					Real phiDotGram2 = 0.;
+					for (int u=0; u<p; u++)
+					{
+						if (u != j)
+							phiDotGram2 += phi[ai(u,mm,r,p,m,k)] * Gram2[ai(j,u,r,p,p,k)];
+					}
+					//S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
+					S[ai(j,mm,r,p,m,k)] = -rho[ai(mm,mm,r,m,m,k)] * ps2[ai(j,mm,r,p,m,k)]
+						+ phiDotGram2;
+					Real pirPowGamma = pow(pi[r],gamma);
+					if (fabs(S[ai(j,mm,r,p,m,k)]) <= n*lambda*pirPowGamma)
+						phi[ai(j,mm,r,p,m,k)] = 0.;
+					else if (S[ai(j,mm,r,p,m,k)] > n*lambda*pirPowGamma)
+					{
+						phi[ai(j,mm,r,p,m,k)] = (n*lambda*pirPowGamma - S[ai(j,mm,r,p,m,k)])
+							/ Gram2[ai(j,j,r,p,p,k)];
+					}
+					else
+					{
+						phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pirPowGamma + S[ai(j,mm,r,p,m,k)])
+							/ Gram2[ai(j,j,r,p,p,k)];
+					}
+				}
+			}
+		}
+
+		/////////////
+		// Etape E //
+		/////////////
+
+		// Precompute det(rho[,,r]) for r in 1...k
+		int signum;
+		for (int r=0; r<k; r++)
+		{
+			for (int u=0; u<m; u++)
+			{
+				for (int v=0; v<m; v++)
+					matrix->data[u*m+v] = rho[ai(u,v,r,m,m,k)];
+			}
+			gsl_linalg_LU_decomp(matrix, permutation, &signum);
+			detRho[r] = gsl_linalg_LU_det(matrix, signum);
+		}
+
+		Real sumLogLLH = 0.;
+		for (int i=0; i<n; i++)
+		{
+			for (int r=0; r<k; r++)
+			{
+				//compute Y[i,]%*%rho[,,r]
+				for (int u=0; u<m; u++)
+				{
+					YiRhoR[u] = 0.;
+					for (int v=0; v<m; v++)
+						YiRhoR[u] += Y[mi(i,v,n,m)] * rho[ai(v,u,r,m,m,k)];
+				}
+
+				//compute X[i,]%*%phi[,,r]
+				for (int u=0; u<m; u++)
+				{
+					XiPhiR[u] = 0.;
+					for (int v=0; v<p; v++)
+						XiPhiR[u] += X[mi(i,v,n,p)] * phi[ai(v,u,r,p,m,k)];
+				}
+
+				//compute sq norm || Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) ||_2^2
+				sqNorm2[r] = 0.;
+				for (int u=0; u<m; u++)
+					sqNorm2[r] += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
+			}
+
+			Real sumGamI = 0.;
+			for (int r=0; r<k; r++)
+			{
+				gam[mi(i,r,n,k)] = pi[r] * exp(-.5*sqNorm2[r]) * detRho[r];
+				sumGamI += gam[mi(i,r,n,k)];
+			}
+
+			sumLogLLH += log(sumGamI) - log(gaussConstM);
+			if (sumGamI > EPS) //else: gam[i,] is already ~=0
+			{
+				for (int r=0; r<k; r++)
+					gam[mi(i,r,n,k)] /= sumGamI;
+			}
+		}
+
+		//sumPen = sum(pi^gamma * b)
+		Real sumPen = 0.;
+		for (int r=0; r<k; r++)
+			sumPen += pow(pi[r],gamma) * b[r];
+		Real last_llh = *llh;
+		//llh = -sumLogLLH/n + lambda*sumPen
+		*llh = -invN * sumLogLLH + lambda * sumPen;
+		Real dist = ite==1 ? *llh : (*llh - last_llh) / (1. + fabs(*llh));
+
+		//Dist1 = max( abs(phi-Phi) / (1+abs(phi)) )
+		Real Dist1 = 0.;
+		for (int u=0; u<p; u++)
+		{
+			for (int v=0; v<m; v++)
+			{
+				for (int w=0; w<k; w++)
+				{
+					Real tmpDist = fabs(phi[ai(u,v,w,p,m,k)]-Phi[ai(u,v,w,p,m,k)])
+						/ (1.+fabs(phi[ai(u,v,w,p,m,k)]));
+					if (tmpDist > Dist1)
+						Dist1 = tmpDist;
+				}
+			}
+		}
+		//Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
+		Real Dist2 = 0.;
+		for (int u=0; u<m; u++)
+		{
+			for (int v=0; v<m; v++)
+			{
+				for (int w=0; w<k; w++)
+				{
+					Real tmpDist = fabs(rho[ai(u,v,w,m,m,k)]-Rho[ai(u,v,w,m,m,k)])
+						/ (1.+fabs(rho[ai(u,v,w,m,m,k)]));
+					if (tmpDist > Dist2)
+						Dist2 = tmpDist;
+				}
+			}
+		}
+		//Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)))
+		Real Dist3 = 0.;
+		for (int u=0; u<n; u++)
+		{
+			for (int v=0; v<k; v++)
+			{
+				Real tmpDist = fabs(pi[v]-Pi[v]) / (1.+fabs(pi[v]));
+				if (tmpDist > Dist3)
+					Dist3 = tmpDist;
+			}
+		}
+		//dist2=max([max(Dist1),max(Dist2),max(Dist3)]);
+		Real dist2 = Dist1;
+		if (Dist2 > dist2)
+			dist2 = Dist2;
+		if (Dist3 > dist2)
+			dist2 = Dist3;
+
+		if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
+			break;
+	}
+
+	//affec = apply(gam, 1, which.max)
+	for (int i=0; i<n; i++)
+	{
+		Real rowMax = 0.;
+		affec[i] = 0;
+		for (int j=0; j<k; j++)
+		{
+			if (gam[mi(i,j,n,k)] > rowMax)
+			{
+				affec[i] = j+1; //R indices start at 1
+				rowMax = gam[mi(i,j,n,k)];
+			}
+		}
+	}
+
+	//free memory
+	free(b);
+	free(gam);
+	free(Phi);
+	free(Rho);
+	free(Pi);
+	free(Gram2);
+	free(ps2);
+	free(detRho);
+	gsl_matrix_free(matrix);
+	gsl_permutation_free(permutation);
+	free(XiPhiR);
+	free(YiRhoR);
+	free(gam2);
+	free(pi2);
+	free(X2);
+	free(Y2);
+	free(sqNorm2);
+}
diff --git a/pkg/src/sources/EMGLLF.h b/pkg/src/sources/EMGLLF.h
new file mode 100644
index 0000000..e15cb87
--- /dev/null
+++ b/pkg/src/sources/EMGLLF.h
@@ -0,0 +1,32 @@
+#ifndef valse_EMGLLF_H
+#define valse_EMGLLF_H
+
+#include "utils.h"
+
+void EMGLLF_core(
+	// IN parameters
+	const Real* phiInit,
+	const Real* rhoInit,
+	const Real* piInit,
+	const Real* gamInit,
+	int mini,
+	int maxi,
+	Real gamma,
+	Real lambda,
+	const Real* X,
+	const Real* Y,
+	Real tau,
+	// OUT parameters
+	Real* phi,
+	Real* rho,
+	Real* pi,
+	Real* LLF,
+	Real* S,
+	int* affec,
+	// additional size parameters
+	int n,
+	int p,
+	int m,
+	int k);
+
+#endif
diff --git a/pkg/src/sources/EMGrank.c b/pkg/src/sources/EMGrank.c
new file mode 100644
index 0000000..3a9bf94
--- /dev/null
+++ b/pkg/src/sources/EMGrank.c
@@ -0,0 +1,307 @@
+#include <stdlib.h>
+#include <gsl/gsl_linalg.h>
+#include "utils.h"
+
+// Compute pseudo-inverse of a square matrix
+static Real* pinv(const Real* matrix, int dim)
+{
+	gsl_matrix* U = gsl_matrix_alloc(dim,dim);
+	gsl_matrix* V = gsl_matrix_alloc(dim,dim);
+	gsl_vector* S = gsl_vector_alloc(dim);
+	gsl_vector* work = gsl_vector_alloc(dim);
+	Real EPS = 1e-10; //threshold for singular value "== 0"
+	
+	//copy matrix into U
+	copyArray(matrix, U->data, dim*dim);
+
+	//U,S,V = SVD of matrix
+	gsl_linalg_SV_decomp(U, V, S, work);
+	gsl_vector_free(work);
+
+	// Obtain pseudo-inverse by V*S^{-1}*t(U)
+	Real* inverse = (Real*)malloc(dim*dim*sizeof(Real));
+	for (int i=0; i<dim; i++)
+	{
+		for (int ii=0; ii<dim; ii++)
+		{
+			Real dotProduct = 0.0;
+			for (int j=0; j<dim; j++)
+				dotProduct += V->data[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j];
+			inverse[i*dim+ii] = dotProduct;
+		}
+	}
+	
+	gsl_matrix_free(U);
+	gsl_matrix_free(V);
+	gsl_vector_free(S);
+	return inverse;
+}
+
+// TODO: comment EMGrank purpose
+void EMGrank_core(
+	// IN parameters
+	const Real* Pi, // parametre de proportion
+	const Real* Rho, // parametre initial de variance renormalisÃ©
+	int mini, // nombre minimal d'itÃ©rations dans l'algorithme EM
+	int maxi, // nombre maximal d'itÃ©rations dans l'algorithme EM
+	const Real* X, // rÃ©gresseurs
+	const Real* Y, // rÃ©ponse
+	Real tau, // seuil pour accepter la convergence
+	const int* rank, // vecteur des rangs possibles
+	// OUT parameters
+	Real* phi, // parametre de moyenne renormalisÃ©, calculÃ© par l'EM
+	Real* LLF, // log vraisemblance associÃ© Ã  cet Ã©chantillon, pour les valeurs estimÃ©es des paramÃ¨tres
+	// additional size parameters
+	int n, // taille de l'echantillon
+	int p, // nombre de covariables
+	int m, // taille de Y (multivariÃ©)
+	int k) // nombre de composantes
+{
+	// Allocations, initializations
+	Real* Phi = (Real*)calloc(p*m*k,sizeof(Real));
+	Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real));
+	int signum;
+	Real invN = 1.0/n;
+	int deltaPhiBufferSize = 20;
+	Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real));
+	int ite = 0;
+	Real sumDeltaPhi = 0.0;
+	Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
+	Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
+	Real* Xr = (Real*)malloc(n*p*sizeof(Real));
+	Real* Yr = (Real*)malloc(n*m*sizeof(Real));
+	Real* tXrXr = (Real*)malloc(p*p*sizeof(Real));
+	Real* tXrYr = (Real*)malloc(p*m*sizeof(Real));
+	gsl_matrix* matrixM = gsl_matrix_alloc(p, m);
+	gsl_matrix* matrixE = gsl_matrix_alloc(m, m);
+	gsl_permutation* permutation = gsl_permutation_alloc(m);
+	gsl_matrix* V = gsl_matrix_alloc(m,m);
+	gsl_vector* S = gsl_vector_alloc(m);
+	gsl_vector* work = gsl_vector_alloc(m);
+
+	//Initialize class memberships (all elements in class 0; TODO: randomize ?)
+	int* Z = (int*)calloc(n, sizeof(int));
+
+	//Initialize phi to zero, because some M loops might exit before phi affectation
+	zeroArray(phi, p*m*k);
+
+	while (ite<mini || (ite<maxi && sumDeltaPhi>tau))
+	{
+		/////////////
+		// Etape M //
+		/////////////
+		
+		//M step: Mise Ã  jour de Beta (et donc phi)
+		for (int r=0; r<k; r++)
+		{
+			//Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:)
+			int cardClustR=0;
+			for (int i=0; i<n; i++)
+			{
+				if (Z[i] == r)
+				{
+					for (int j=0; j<p; j++)
+						Xr[mi(cardClustR,j,n,p)] = X[mi(i,j,n,p)];
+					for (int j=0; j<m; j++)
+						Yr[mi(cardClustR,j,n,m)] = Y[mi(i,j,n,m)];
+					cardClustR++;
+				}
+			}
+			if (cardClustR == 0)
+				continue;
+
+			//Compute tXrXr = t(Xr) * Xr
+			for (int j=0; j<p; j++)
+			{
+				for (int jj=0; jj<p; jj++)
+				{
+					Real dotProduct = 0.0;
+					for (int u=0; u<cardClustR; u++)
+						dotProduct += Xr[mi(u,j,n,p)] * Xr[mi(u,jj,n,p)];
+					tXrXr[mi(j,jj,p,p)] = dotProduct;
+				}
+			}
+
+			//Get pseudo inverse = (t(Xr)*Xr)^{-1}
+			Real* invTXrXr = pinv(tXrXr, p);
+			
+			// Compute tXrYr = t(Xr) * Yr
+			for (int j=0; j<p; j++)
+			{
+				for (int jj=0; jj<m; jj++)
+				{
+					Real dotProduct = 0.0;
+					for (int u=0; u<cardClustR; u++)
+						dotProduct += Xr[mi(u,j,n,p)] * Yr[mi(u,jj,n,m)];
+					tXrYr[mi(j,jj,p,m)] = dotProduct;
+				}
+			}
+
+			//Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr
+			for (int j=0; j<p; j++)
+			{
+				for (int jj=0; jj<m; jj++)
+				{
+					Real dotProduct = 0.0;
+					for (int u=0; u<p; u++)
+						dotProduct += invTXrXr[mi(j,u,p,p)] * tXrYr[mi(u,jj,p,m)];
+					matrixM->data[j*m+jj] = dotProduct;
+				}
+			}
+			free(invTXrXr);
+
+			//U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+			gsl_linalg_SV_decomp(matrixM, V, S, work);
+
+			//Set m-rank(r) singular values to zero, and recompose
+			//best rank(r) approximation of the initial product
+			for (int j=rank[r]; j<m; j++)
+				S->data[j] = 0.0;
+			
+			//[intermediate step] Compute hatBetaR = U * S * t(V)
+			double* U = matrixM->data; //GSL require double precision
+			for (int j=0; j<p; j++)
+			{
+				for (int jj=0; jj<m; jj++)
+				{
+					Real dotProduct = 0.0;
+					for (int u=0; u<m; u++)
+						dotProduct += U[j*m+u] * S->data[u] * V->data[jj*m+u];
+					hatBetaR[mi(j,jj,p,m)] = dotProduct;
+				}
+			}
+
+			//Compute phi(:,:,r) = hatBetaR * Rho(:,:,r)
+			for (int j=0; j<p; j++)
+			{
+				for (int jj=0; jj<m; jj++)
+				{
+					Real dotProduct=0.0;
+					for (int u=0; u<m; u++)
+						dotProduct += hatBetaR[mi(j,u,p,m)] * Rho[ai(u,jj,r,m,m,k)];
+					phi[ai(j,jj,r,p,m,k)] = dotProduct;
+				}
+		}
+		}
+
+		/////////////
+		// Etape E //
+		/////////////
+		
+		Real sumLogLLF2 = 0.0;
+		for (int i=0; i<n; i++)
+		{
+			Real sumLLF1 = 0.0;
+			Real maxLogGamIR = -INFINITY;
+			for (int r=0; r<k; r++)
+			{
+				//Compute
+				//Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)...
+				//*phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) );
+				//split in several sub-steps
+				
+				//compute det(Rho(:,:,r)) [TODO: avoid re-computations]
+				for (int j=0; j<m; j++)
+				{
+					for (int jj=0; jj<m; jj++)
+						matrixE->data[j*m+jj] = Rho[ai(j,jj,r,m,m,k)];
+				}
+				gsl_linalg_LU_decomp(matrixE, permutation, &signum);
+				Real detRhoR = gsl_linalg_LU_det(matrixE, signum);
+
+				//compute Y(i,:)*Rho(:,:,r)
+				for (int j=0; j<m; j++)
+				{
+					YiRhoR[j] = 0.0;
+					for (int u=0; u<m; u++)
+						YiRhoR[j] += Y[mi(i,u,n,m)] * Rho[ai(u,j,r,m,m,k)];
+				}
+
+				//compute X(i,:)*phi(:,:,r)
+				for (int j=0; j<m; j++)
+				{
+					XiPhiR[j] = 0.0;
+					for (int u=0; u<p; u++)
+						XiPhiR[j] += X[mi(i,u,n,p)] * phi[ai(u,j,r,p,m,k)];
+				}
+
+				//compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) >
+				Real dotProduct = 0.0;
+				for (int u=0; u<m; u++)
+					dotProduct += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
+				Real logGamIR = log(Pi[r]) + log(detRhoR) - 0.5*dotProduct;
+
+				//Z(i) = index of max (gam(i,:))
+				if (logGamIR > maxLogGamIR)
+				{
+					Z[i] = r;
+					maxLogGamIR = logGamIR;
+				}
+				sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0);
+			}
+
+			sumLogLLF2 += log(sumLLF1);
+		}
+
+		// Assign output variable LLF
+		*LLF = -invN * sumLogLLF2;
+
+		//newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi))));
+		Real newDeltaPhi = 0.0;
+		for (int j=0; j<p; j++)
+		{
+			for (int jj=0; jj<m; jj++)
+			{
+				for (int r=0; r<k; r++)
+				{
+					Real tmpDist = fabs(phi[ai(j,jj,r,p,m,k)]-Phi[ai(j,jj,r,p,m,k)])
+						/ (1.0+fabs(phi[ai(j,jj,r,p,m,k)]));
+					if (tmpDist > newDeltaPhi)
+						newDeltaPhi = tmpDist;
+				}
+			}
+		}
+
+		//update distance parameter to check algorithm convergence (delta(phi, Phi))
+		//TODO: deltaPhi should be a linked list for perf.
+		if (ite < deltaPhiBufferSize)
+			deltaPhi[ite] = newDeltaPhi;
+		else
+		{
+			sumDeltaPhi -= deltaPhi[0];
+			for (int u=0; u<deltaPhiBufferSize-1; u++)
+				deltaPhi[u] = deltaPhi[u+1];
+			deltaPhi[deltaPhiBufferSize-1] = newDeltaPhi;
+		}
+		sumDeltaPhi += newDeltaPhi;
+
+		// update other local variables
+		for (int j=0; j<m; j++)
+		{
+			for (int jj=0; jj<p; jj++)
+			{
+				for (int r=0; r<k; r++)
+					Phi[ai(jj,j,r,p,m,k)] = phi[ai(jj,j,r,p,m,k)];
+			}
+		}
+		ite++;
+	}
+
+	//free memory
+	free(hatBetaR);
+	free(deltaPhi);
+	free(Phi);
+	gsl_matrix_free(matrixE);
+	gsl_matrix_free(matrixM);
+	gsl_permutation_free(permutation);
+	gsl_vector_free(work);
+	gsl_matrix_free(V);
+	gsl_vector_free(S);
+	free(XiPhiR);
+	free(YiRhoR);
+	free(Xr);
+	free(Yr);
+	free(tXrXr);
+	free(tXrYr);
+	free(Z);
+}
diff --git a/pkg/src/sources/EMGrank.h b/pkg/src/sources/EMGrank.h
new file mode 100644
index 0000000..b7367d8
--- /dev/null
+++ b/pkg/src/sources/EMGrank.h
@@ -0,0 +1,25 @@
+#ifndef valse_EMGrank_H
+#define valse_EMGrank_H
+
+#include "utils.h"
+
+void EMGrank_core(
+	// IN parameters
+	const Real* Pi,
+	const Real* Rho,
+	int mini,
+	int maxi,
+	const Real* X,
+	const Real* Y,
+	Real tau,
+	const int* rank,
+	// OUT parameters
+	Real* phi,
+	Real* LLF,
+	// additional size parameters
+	int n,
+	int p,
+	int m,
+	int k);
+
+#endif
diff --git a/pkg/src/sources/utils.h b/pkg/src/sources/utils.h
new file mode 100644
index 0000000..be3a72f
--- /dev/null
+++ b/pkg/src/sources/utils.h
@@ -0,0 +1,48 @@
+#ifndef valse_utils_H
+#define valse_utils_H
+
+//#include <stdint.h>
+
+/********
+ * Types
+ *******/
+
+typedef double Real;
+//typedef uint32_t UInt;
+//typedef int32_t Int;
+
+/*******************************
+ * Matrix and arrays indexation
+ *******************************/
+
+// Matrix Index ; TODO? ncol unused
+#define mi(i,j,nrow,ncol)\
+	j*nrow + i
+
+// Array Index ; TODO? d3 unused
+#define ai(i,j,k,d1,d2,d3)\
+	k*d1*d2 + j*d1 + i
+
+// Array4 Index ; TODO? ...
+#define ai4(i,j,k,m,d1,d2,d3,d4)\
+	m*d1*d2*d3 + k*d1*d2 + j*d1 + i
+
+/*************************
+ * Array copy & "zeroing"
+ ************************/
+
+// Fill an array with zeros
+#define zeroArray(array, size)\
+{\
+	for (int u=0; u<size; u++)\
+		array[u] = 0;\
+}
+
+// Copy an 1D array
+#define copyArray(array, copy, size)\
+{\
+	for (int u=0; u<size; u++)\
+		copy[u] = array[u];\
+}
+
+#endif
diff --git a/pkg/tests/testthat.R b/pkg/tests/testthat.R
new file mode 100644
index 0000000..88e5631
--- /dev/null
+++ b/pkg/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(valse) #ou load_all()
+
+test_check("valse")
diff --git a/pkg/tests/testthat/helper-context1.R b/pkg/tests/testthat/helper-context1.R
new file mode 100644
index 0000000..b40f358
--- /dev/null
+++ b/pkg/tests/testthat/helper-context1.R
@@ -0,0 +1,5 @@
+# Potential helpers for context 1
+help <- function()
+{
+	#...
+}
diff --git a/pkg/tests/testthat/test-context1.R b/pkg/tests/testthat/test-context1.R
new file mode 100644
index 0000000..17c633f
--- /dev/null
+++ b/pkg/tests/testthat/test-context1.R
@@ -0,0 +1,11 @@
+context("Context1")
+
+test_that("function 1...",
+{
+	#expect_lte( ..., ...)
+})
+
+test_that("function 2...",
+{
+	#expect_equal(..., ...)
+})
diff --git a/pkg/vignettes/.gitignore b/pkg/vignettes/.gitignore
new file mode 100644
index 0000000..e6493d4
--- /dev/null
+++ b/pkg/vignettes/.gitignore
@@ -0,0 +1,14 @@
+#ignore jupyter generated file (ipynb, HTML)
+*.html
+*.ipynb
+
+#and various (pdf)LaTeX files, in case of
+*.tex
+*.pdf
+*.aux
+*.dvi
+*.log
+*.out
+*.toc
+*.synctex.gz
+/figure/