essai fusion

author emilie <emilie@devijver.org>

Fri, 21 Apr 2017 13:52:14 +0000 (15:52 +0200)

committer emilie <emilie@devijver.org>

Fri, 21 Apr 2017 13:52:14 +0000 (15:52 +0200)
author emilie <emilie@devijver.org>
Fri, 21 Apr 2017 13:52:14 +0000 (15:52 +0200)
committer emilie <emilie@devijver.org>
Fri, 21 Apr 2017 13:52:14 +0000 (15:52 +0200)
diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION

deleted file mode 100644 (file)

index 3b33e25..0000000
--- a/pkg/DESCRIPTION
+++ /dev/null
@@ -1,42 +0,0 @@
-Package: valse
-Title: Variable Selection With Mixture Of Models
-Date: 2016-12-01
-Version: 0.1-0
-Description: Two methods are implemented to cluster data with finite mixture
-    regression models. Those procedures deal with high-dimensional covariates and
-    responses through a variable selection procedure based on the Lasso estimator.
-    A low-rank constraint could be added, computed for the Lasso-Rank procedure.
-    A collection of models is constructed, varying the level of sparsity and the
-    number of clusters, and a model is selected using a model selection criterion
-    (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model-
-    based clustering for high-dimensional data. Application to functional data' by
-    Emilie Devijver, published in Advances in Data Analysis and Clustering (2016).
-Author: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre],
-    Emilie Devijver <Emilie.Devijver@kuleuven.be> [aut],
-    Benjamin Goehry <Benjamin.Goehry@math.u-psud.fr> [aut]
-Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr>
-Depends:
-    R (>= 3.0.0)
-Imports:
-    MASS,
-    parallel
-Suggests:
-    capushe,
-    roxygen2,
-    testhat
-URL: http://git.auder.net/?p=valse.git
-License: MIT + file LICENSE
-RoxygenNote: 5.0.1
-Collate:
-    'plot_valse.R'
-    'main.R'
-    'selectVariables.R'
-    'constructionModelesLassoRank.R'
-    'constructionModelesLassoMLE.R'
-    'computeGridLambda.R'
-    'initSmallEM.R'
-    'EMGrank.R'
-    'EMGLLF.R'
-    'generateXY.R'
-    'A_NAMESPACE.R'
-    'util.R'
diff --git a/pkg/LICENSE b/pkg/LICENSE

deleted file mode 100644 (file)

index a212458..0000000
--- a/pkg/LICENSE
+++ /dev/null
@@ -1,23 +0,0 @@
-Copyright (c)
-  2014-2017, Benjamin Auder
-       2014-2017, Emilie Devijver
-       2016-2017, Benjamin Goehry
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R

deleted file mode 100644 (file)

index 8e1783e..0000000
--- a/pkg/R/A_NAMESPACE.R
+++ /dev/null
@@ -1,16 +0,0 @@
-#' @include generateXY.R
-#' @include EMGLLF.R
-#' @include EMGrank.R
-#' @include initSmallEM.R
-#' @include computeGridLambda.R
-#' @include constructionModelesLassoMLE.R
-#' @include constructionModelesLassoRank.R
-#' @include selectVariables.R
-#' @include main.R
-#' @include plot_valse.R
-#'
-#' @useDynLib valse
-#'
-#' @importFrom parallel makeCluster parLapply stopCluster clusterExport
-#' @importFrom MASS ginv
-NULL
diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R

deleted file mode 100644 (file)

index 03f0a75..0000000
--- a/pkg/R/EMGLLF.R
+++ /dev/null
@@ -1,194 +0,0 @@
-#' EMGLLF 
-#'
-#' Description de EMGLLF
-#'
-#' @param phiInit an initialization for phi
-#' @param rhoInit an initialization for rho
-#' @param piInit an initialization for pi
-#' @param gamInit initialization for the a posteriori probabilities
-#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
-#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
-#' @param gamma integer for the power in the penaly, by default = 1
-#' @param lambda regularization parameter in the Lasso estimation
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
-#'
-#' @return A list ... phi,rho,pi,LLF,S,affec:
-#'   phi : parametre de moyenne renormalisé, calculé par l'EM
-#'   rho : parametre de variance renormalisé, calculé par l'EM
-#'   pi : parametre des proportions renormalisé, calculé par l'EM
-#'   LLF : log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres
-#'   S : ... affec : ...
-#'
-#' @export
-EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
-  X, Y, eps, fast)
-{
-  if (!fast)
-  {
-    # Function in R
-    return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
-      X, Y, eps))
-  }
-
-  # Function in C
-  n <- nrow(X)  #nombre d'echantillons
-  p <- ncol(X)  #nombre de covariables
-  m <- ncol(Y)  #taille de Y (multivarié)
-  k <- length(piInit)  #nombre de composantes dans le mélange
-  .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
-    X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k), 
-    LLF = double(maxi), S = double(p * m * k), affec = integer(n), n, p, m, k, 
-    PACKAGE = "valse")
-}
-
-# R version - slow but easy to read
-.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
-  X, Y, eps)
-{
-  # Matrix dimensions
-  n <- nrow(X)
-  p <- ncol(X)
-  m <- ncol(Y)
-  k <- length(piInit)
-
-  # Adjustments required when p==1 or m==1 (var.sel. or output dim 1)
-  if (p==1 || m==1)
-    phiInit <- array(phiInit, dim=c(p,m,k))
-  if (m==1)
-    rhoInit <- array(rhoInit, dim=c(m,m,k))
-
-  # Outputs
-  phi <- phiInit
-  rho <- rhoInit
-  pi <- piInit
-  llh <- -Inf
-  S <- array(0, dim = c(p, m, k))
-
-  # Algorithm variables
-  gam <- gamInit
-  Gram2 <- array(0, dim = c(p, p, k))
-  ps2 <- array(0, dim = c(p, m, k))
-  X2 <- array(0, dim = c(n, p, k))
-  Y2 <- array(0, dim = c(n, m, k))
-  EPS <- 1e-15
-
-  for (ite in 1:maxi)
-  {
-    # Remember last pi,rho,phi values for exit condition in the end of loop
-    Phi <- phi
-    Rho <- rho
-    Pi <- pi
-
-    # Computations associated to X and Y
-    for (r in 1:k)
-    {
-      for (mm in 1:m)
-        Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
-      for (i in 1:n)
-        X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
-      for (mm in 1:m)
-        ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
-      for (j in 1:p)
-      {
-        for (s in 1:p)
-          Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
-      }
-    }
-
-    ## M step
-
-    # For pi
-    b <- sapply(1:k, function(r) sum(abs(phi[, , r])))
-    gam2 <- colSums(gam)
-    a <- sum(gam %*% log(pi))
-
-    # While the proportions are nonpositive
-    kk <- 0
-    pi2AllPositive <- FALSE
-    while (!pi2AllPositive)
-    {
-      pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi)
-      pi2AllPositive <- all(pi2 >= 0)
-      kk <- kk + 1
-    }
-
-    # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
-    while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
-      # na.rm=TRUE to handle 0*log(0)
-      -sum(gam2 * log(pi2), na.rm=TRUE)/n + lambda * sum(pi2^gamma * b))
-    {
-      pi2 <- pi + 0.1^kk * (1/n * gam2 - pi)
-      kk <- kk + 1
-    }
-    t <- 0.1^kk
-    pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi))
-
-    # For phi and rho
-    for (r in 1:k)
-    {
-      for (mm in 1:m)
-      {
-        ps <- 0
-        for (i in 1:n)
-          ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r])
-        nY2 <- sum(Y2[, mm, r]^2)
-        rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2)
-      }
-    }
-
-    for (r in 1:k)
-    {
-      for (j in 1:p)
-      {
-        for (mm in 1:m)
-        {
-          S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] +
-            sum(phi[-j, mm, r] * Gram2[j, -j, r])
-          if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) {
-            phi[j, mm, r] <- 0
-          } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) {
-            phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r]
-          } else {
-            phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r]
-          }
-        }
-      }
-    }
-
-    ## E step
-
-    # Precompute det(rho[,,r]) for r in 1...k
-    detRho <- sapply(1:k, function(r) gdet(rho[, , r]))
-    sumLogLLH <- 0
-    for (i in 1:n)
-    {
-      # Update gam[,]; use log to avoid numerical problems
-      logGam <- sapply(1:k, function(r) {
-        log(pi[r]) + log(detRho[r]) - 0.5 *
-          sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2)
-      })
-
-      logGam <- logGam - max(logGam) #adjust without changing proportions
-      gam[i, ] <- exp(logGam)
-      norm_fact <- sum(gam[i, ])
-      gam[i, ] <- gam[i, ] / norm_fact
-      sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2))
-    }
-
-    sumPen <- sum(pi^gamma * b)
-    last_llh <- llh
-    llh <- -sumLogLLH/n + lambda * sumPen
-    dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh)))
-    Dist1 <- max((abs(phi - Phi))/(1 + abs(phi)))
-    Dist2 <- max((abs(rho - Rho))/(1 + abs(rho)))
-    Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi)))
-    dist2 <- max(Dist1, Dist2, Dist3)
-
-    if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
-      break
-  }
-
-  list(phi = phi, rho = rho, pi = pi, llh = llh, S = S)
-}
diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R

deleted file mode 100644 (file)

index b85a0fa..0000000
--- a/pkg/R/EMGrank.R
+++ /dev/null
@@ -1,120 +0,0 @@
-#' EMGrank
-#'
-#' Description de EMGrank
-#'
-#' @param Pi Parametre de proportion
-#' @param Rho Parametre initial de variance renormalisé
-#' @param mini Nombre minimal d'itérations dans l'algorithme EM
-#' @param maxi Nombre maximal d'itérations dans l'algorithme EM
-#' @param X Régresseurs
-#' @param Y Réponse
-#' @param tau Seuil pour accepter la convergence
-#' @param rank Vecteur des rangs possibles
-#'
-#' @return A list ...
-#'   phi : parametre de moyenne renormalisé, calculé par l'EM
-#'   LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
-#'
-#' @export
-EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast = TRUE)
-{
-  if (!fast)
-  {
-    # Function in R
-    return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
-  }
-
-  # Function in C
-  n <- nrow(X)  #nombre d'echantillons
-  p <- ncol(X)  #nombre de covariables
-  m <- ncol(Y)  #taille de Y (multivarié)
-  k <- length(Pi)  #nombre de composantes dans le mélange
-  .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, tau, rank, phi = double(p * m * k), 
-    LLF = double(1), n, p, m, k, PACKAGE = "valse")
-}
-
-# helper to always have matrices as arg (TODO: put this elsewhere? improve?)  -->
-# Yes, we should use by-columns storage everywhere... [later!]
-matricize <- function(X)
-{
-  if (!is.matrix(X)) 
-    return(t(as.matrix(X)))
-  return(X)
-}
-
-# R version - slow but easy to read
-.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
-{
-  # matrix dimensions
-  n <- nrow(X)
-  p <- ncol(X)
-  m <- ncol(Y)
-  k <- length(Pi)
-
-  # init outputs
-  phi <- array(0, dim = c(p, m, k))
-  Z <- rep(1, n)
-  LLF <- 0
-
-  # local variables
-  Phi <- array(0, dim = c(p, m, k))
-  deltaPhi <- c()
-  sumDeltaPhi <- 0
-  deltaPhiBufferSize <- 20
-  
-  # main loop
-  ite <- 1
-  while (ite <= mini || (ite <= maxi && sumDeltaPhi > tau))
-  {
-    # M step: update for Beta ( and then phi)
-    for (r in 1:k)
-    {
-      Z_indice <- seq_len(n)[Z == r] #indices where Z == r
-      if (length(Z_indice) == 0) 
-        next
-      # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
-      s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% 
-                 crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ])))
-      S <- s$d
-      # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation
-      # of the initial product
-      if (rank[r] < length(S)) 
-        S[(rank[r] + 1):length(S)] <- 0
-      phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r]
-    }
-
-    # Step E and computation of the loglikelihood
-    sumLogLLF2 <- 0
-    for (i in seq_len(n))
-    {
-      sumLLF1 <- 0
-      maxLogGamIR <- -Inf
-      for (r in seq_len(k))
-      {
-        dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r])
-        logGamIR <- log(Pi[r]) + log(gdet(Rho[, , r])) - 0.5 * dotProduct
-        # Z[i] = index of max (gam[i,])
-        if (logGamIR > maxLogGamIR)
-        {
-          Z[i] <- r
-          maxLogGamIR <- logGamIR
-        }
-        sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2)
-      }
-      sumLogLLF2 <- sumLogLLF2 + log(sumLLF1)
-    }
-
-    LLF <- -1/n * sumLogLLF2
-
-    # update distance parameter to check algorithm convergence (delta(phi, Phi))
-    deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
-    if (length(deltaPhi) > deltaPhiBufferSize) 
-      deltaPhi <- deltaPhi[2:length(deltaPhi)]
-    sumDeltaPhi <- sum(abs(deltaPhi))
-
-    # update other local variables
-    Phi <- phi
-    ite <- ite + 1
-  }
-  return(list(phi = phi, LLF = LLF))
-}
diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R

deleted file mode 100644 (file)

index cf762ec..0000000
--- a/pkg/R/computeGridLambda.R
+++ /dev/null
@@ -1,36 +0,0 @@
-#' computeGridLambda 
-#'
-#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator
-#'
-#' @param phiInit value for phi
-#' @param rhoInit\tvalue for rho
-#' @param piInit\tvalue for pi
-#' @param gamInit value for gamma
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#' @param gamma power of weights in the penalty
-#' @param mini minimum number of iterations in EM algorithm
-#' @param maxi maximum number of iterations in EM algorithm
-#' @param tau threshold to stop EM algorithm
-#'
-#' @return the grid of regularization parameters
-#'
-#' @export
-computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, 
-  maxi, tau, fast)
-{
-  n <- nrow(X)
-  p <- ncol(X)
-  m <- ncol(Y)
-  k <- length(piInit)
-
-  list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, 
-    X, Y, tau, fast)
-  grid <- array(0, dim = c(p, m, k))
-  for (i in 1:p)
-  {
-    for (j in 1:m)
-      grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma)
-  }
-  sort(unique(grid))
-}
diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R

deleted file mode 100644 (file)

index 1275ca3..0000000
--- a/pkg/R/constructionModelesLassoMLE.R
+++ /dev/null
@@ -1,93 +0,0 @@
-#' constructionModelesLassoMLE 
-#'
-#' Construct a collection of models with the Lasso-MLE procedure.
-#' 
-#' @param phiInit an initialization for phi, get by initSmallEM.R
-#' @param rhoInit an initialization for rho, get by initSmallEM.R
-#' @param piInit an initialization for pi, get by initSmallEM.R
-#' @param gamInit an initialization for gam, get by initSmallEM.R
-#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
-#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
-#' @param gamma integer for the power in the penaly, by default = 1
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
-#' @param S output of selectVariables.R
-#' @param ncores Number of cores, by default = 3
-#' @param fast TRUE to use compiled C code, FALSE for R code only
-#' @param verbose TRUE to show some execution traces
-#' 
-#' @return a list with several models, defined by phi, rho, pi, llh
-#'
-#' @export
-constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, 
-  maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose)
-{
-  if (ncores > 1)
-  {
-    cl <- parallel::makeCluster(ncores, outfile = "")
-    parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit", 
-      "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", 
-      "ncores", "fast", "verbose"))
-  }
-
-  # Individual model computation
-  computeAtLambda <- function(lambda)
-  {
-    if (ncores > 1) 
-      require("valse")  #nodes start with an empty environment
-
-    if (verbose) 
-      print(paste("Computations for lambda=", lambda))
-
-    n <- nrow(X)
-    p <- ncol(X)
-    m <- ncol(Y)
-    k <- length(piInit)
-    sel.lambda <- S[[lambda]]$selected
-    # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
-    col.sel <- which(sapply(sel.lambda, length) > 0)  #if list of selected vars
-    if (length(col.sel) == 0) 
-      return(NULL)
-
-    # lambda == 0 because we compute the EMV: no penalization here
-    res <- EMGLLF(array(phiInit,dim=c(p,m,k))[col.sel, , ], rhoInit, piInit, gamInit,
-      mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast)
-
-    # Eval dimension from the result + selected
-    phiLambda2 <- res$phi
-    rhoLambda <- res$rho
-    piLambda <- res$pi
-    phiLambda <- array(0, dim = c(p, m, k))
-    for (j in seq_along(col.sel))
-      phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ]
-    dimension <- length(unlist(sel.lambda))
-
-    # Computation of the loglikelihood
-    densite <- vector("double", n)
-    for (r in 1:k)
-    {
-      if (length(col.sel) == 1)
-      {
-        delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r])))
-      } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r]))
-      densite <- densite + piLambda[r] * gdet(rhoLambda[, , r])/(sqrt(2 * base::pi))^m * 
-        exp(-diag(tcrossprod(delta))/2)
-    }
-    llhLambda <- c(sum(log(densite)), (dimension + m + 1) * k - 1)
-    list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda)
-  }
-
-  # For each lambda, computation of the parameters
-  out <-
-    if (ncores > 1) {
-      parLapply(cl, 1:length(S), computeAtLambda)
-    } else {
-      lapply(1:length(S), computeAtLambda)
-    }
-
-  if (ncores > 1) 
-    parallel::stopCluster(cl)
-
-  out
-}
diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R

deleted file mode 100644 (file)

index dc88f67..0000000
--- a/pkg/R/constructionModelesLassoRank.R
+++ /dev/null
@@ -1,95 +0,0 @@
-#' constructionModelesLassoRank
-#'
-#' Construct a collection of models with the Lasso-Rank procedure.
-#'
-#' @param S output of selectVariables.R
-#' @param k number of components
-#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
-#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
-#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
-#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
-#' @param ncores Number of cores, by default = 3
-#' @param fast TRUE to use compiled C code, FALSE for R code only
-#' @param verbose TRUE to show some execution traces
-#'
-#' @return a list with several models, defined by phi, rho, pi, llh
-#'
-#' @export
-constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, 
-  ncores, fast, verbose)
-{
-  n <- nrow(X)
-  p <- ncol(X)
-  m <- ncol(Y)
-  L <- length(S)
-
-  # Possible interesting ranks
-  deltaRank <- rank.max - rank.min + 1
-  Size <- deltaRank^k
-  RankLambda <- matrix(0, nrow = Size * L, ncol = k + 1)
-  for (r in 1:k)
-  {
-    # On veut le tableau de toutes les combinaisons de rangs possibles, et des
-    # lambdas Dans la première colonne : on répète (rank.max-rank.min)^(k-1) chaque
-    # chiffre : ça remplit la colonne Dans la deuxieme : on répète
-    # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ça (rank.max-rank.min)^2
-    # fois ...  Dans la dernière, on répète chaque chiffre une fois, et on fait ça
-    # (rank.min-rank.max)^(k-1) fois.
-    RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1), 
-      each = deltaRank^(k - r)), each = L)
-  }
-  RankLambda[, k + 1] <- rep(1:L, times = Size)
-
-  if (ncores > 1)
-  {
-    cl <- parallel::makeCluster(ncores, outfile = "")
-    parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size", 
-      "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", 
-      "verbose"))
-  }
-
-  computeAtLambda <- function(index)
-  {
-    lambdaIndex <- RankLambda[index, k + 1]
-    rankIndex <- RankLambda[index, 1:k]
-    if (ncores > 1) 
-      require("valse")  #workers start with an empty environment
-
-    # 'relevant' will be the set of relevant columns
-    selected <- S[[lambdaIndex]]$selected
-    relevant <- c()
-    for (j in 1:p)
-    {
-      if (length(selected[[j]]) > 0)
-        relevant <- c(relevant, j)
-    }
-    if (max(rankIndex) < length(relevant))
-    {
-      phi <- array(0, dim = c(p, m, k))
-      if (length(relevant) > 0)
-      {
-        res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, 
-          X[, relevant], Y, eps, rankIndex, fast)
-        llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m)))
-        phi[relevant, , ] <- res$phi
-      }
-      list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho)
-    }
-  }
-
-  # For each lambda in the grid we compute the estimators
-  out <-
-    if (ncores > 1) {
-      parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
-    } else {
-      lapply(seq_len(length(S) * Size), computeAtLambda)
-    }
-
-  if (ncores > 1) 
-    parallel::stopCluster(cl)
-
-  out
-}
diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R

deleted file mode 100644 (file)

index 064b54b..0000000
--- a/pkg/R/generateXY.R
+++ /dev/null
@@ -1,39 +0,0 @@
-#' generateXY 
-#'
-#' Generate a sample of (X,Y) of size n
-#'
-#' @param n sample size
-#' @param π proportion for each cluster
-#' @param meanX matrix of group means for covariates (of size p)
-#' @param covX covariance for covariates (of size p*p)
-#' @param β regression matrix, of size p*m*k
-#' @param covY covariance for the response vector (of size m*m*K)
-#'
-#' @return list with X and Y
-#'
-#' @export
-generateXY <- function(n, π, meanX, β, covX, covY)
-{
-  p <- dim(covX)[1]
-  m <- dim(covY)[1]
-  k <- dim(covY)[3]
-
-  X <- matrix(nrow = 0, ncol = p)
-  Y <- matrix(nrow = 0, ncol = m)
-
-  # random generation of the size of each population in X~Y (unordered)
-  sizePop <- rmultinom(1, n, π)
-  class <- c() #map i in 1:n --> index of class in 1:k
-
-  for (i in 1:k)
-  {
-    class <- c(class, rep(i, sizePop[i]))
-    newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
-    X <- rbind(X, newBlockX)
-    Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% 
-      β[, , i], covY[, , i]))))
-  }
-
-  shuffle <- sample(n)
-  list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle])
-}
diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R

deleted file mode 100644 (file)

index 44b4b06..0000000
--- a/pkg/R/initSmallEM.R
+++ /dev/null
@@ -1,79 +0,0 @@
-#' initialization of the EM algorithm 
-#'
-#' @param k number of components
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#'
-#' @return a list with phiInit, rhoInit, piInit, gamInit
-#' @export
-#' @importFrom methods new
-#' @importFrom stats cutree dist hclust runif
-initSmallEM <- function(k, X, Y, fast)
-{
-  n <- nrow(X)
-  p <- ncol(X)
-  m <- ncol(Y)
-  nIte <- 20
-  Zinit1 <- array(0, dim = c(n, nIte))
-  betaInit1 <- array(0, dim = c(p, m, k, nIte))
-  sigmaInit1 <- array(0, dim = c(m, m, k, nIte))
-  phiInit1 <- array(0, dim = c(p, m, k, nIte))
-  rhoInit1 <- array(0, dim = c(m, m, k, nIte))
-  Gam <- matrix(0, n, k)
-  piInit1 <- matrix(0, nIte, k)
-  gamInit1 <- array(0, dim = c(n, k, nIte))
-  LLFinit1 <- list()
-
-  # require(MASS) #Moore-Penrose generalized inverse of matrix
-  for (repet in 1:nIte)
-  {
-    distance_clus <- dist(cbind(X, Y))
-    tree_hier <- hclust(distance_clus)
-    Zinit1[, repet] <- cutree(tree_hier, k)
-
-    for (r in 1:k)
-    {
-      Z <- Zinit1[, repet]
-      Z_indice <- seq_len(n)[Z == r]  #renvoit les indices où Z==r
-      if (length(Z_indice) == 1) {
-        betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% 
-          crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
-      } else {
-        betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% 
-          crossprod(X[Z_indice, ], Y[Z_indice, ])
-      }
-      sigmaInit1[, , r, repet] <- diag(m)
-      phiInit1[, , r, repet] <- betaInit1[, , r, repet]  #/ sigmaInit1[,,r,repet]
-      rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
-      piInit1[repet, r] <- mean(Z == r)
-    }
-
-    for (i in 1:n)
-    {
-      for (r in 1:k)
-      {
-        dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
-          - X[i, ] %*% phiInit1[, , r, repet])
-        Gam[i, r] <- piInit1[repet, r] * 
-          gdet(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
-      }
-      sumGamI <- sum(Gam[i, ])
-      gamInit1[i, , repet] <- Gam[i, ]/sumGamI
-    }
-
-    miniInit <- 10
-    maxiInit <- 11
-
-    init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ],
-      gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y,
-      eps = 1e-04, fast)
-    LLFinit1[[repet]] <- init_EMG$llh
-  }
-  b <- which.min(LLFinit1)
-  phiInit <- phiInit1[, , , b]
-  rhoInit <- rhoInit1[, , , b]
-  piInit <- piInit1[b, ]
-  gamInit <- gamInit1[, , b]
-
-  return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
-}
diff --git a/pkg/R/main.R b/pkg/R/main.R

deleted file mode 100644 (file)

index e741d65..0000000
--- a/pkg/R/main.R
+++ /dev/null
@@ -1,152 +0,0 @@
-#' valse 
-#'
-#' Main function
-#'
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#' @param procedure among 'LassoMLE' or 'LassoRank'
-#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC'
-#' @param gamma integer for the power in the penaly, by default = 1
-#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
-#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
-#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
-#' @param kmin integer, minimum number of clusters, by default = 2
-#' @param kmax integer, maximum number of clusters, by default = 10
-#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
-#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
-#' @param ncores_outer Number of cores for the outer loop on k
-#' @param ncores_inner Number of cores for the inner loop on lambda
-#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
-#' @param size_coll_mod (Maximum) size of a collection of models
-#' @param fast TRUE to use compiled C code, FALSE for R code only
-#' @param verbose TRUE to show some execution traces
-#'
-#' @return a list with estimators of parameters
-#'
-#' @examples
-#' #TODO: a few examples
-#' @export
-valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, 
-  maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, 
-  ncores_inner = 1, thresh = 1e-08, size_coll_mod = 10, fast = TRUE, verbose = FALSE, 
-  plot = TRUE)
-{
-  n <- nrow(X)
-  p <- ncol(X)
-  m <- ncol(Y)
-
-  if (verbose) 
-    print("main loop: over all k and all lambda")
-
-  if (ncores_outer > 1) {
-    cl <- parallel::makeCluster(ncores_outer, outfile = "")
-    parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", 
-      "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", 
-      "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", 
-      "size_coll_mod", "verbose", "p", "m"))
-  }
-
-  # Compute models with k components
-  computeModels <- function(k)
-  {
-    if (ncores_outer > 1) 
-      require("valse")  #nodes start with an empty environment
-
-    if (verbose) 
-      print(paste("Parameters initialization for k =", k))
-    # smallEM initializes parameters by k-means and regression model in each
-    # component, doing this 20 times, and keeping the values maximizing the
-    # likelihood after 10 iterations of the EM algorithm.
-    P <- initSmallEM(k, X, Y, fast)
-    grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, 
-      X, Y, gamma, mini, maxi, eps, fast)
-    if (length(grid_lambda) > size_coll_mod) 
-      grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
-
-    if (verbose) 
-      print("Compute relevant parameters")
-    # select variables according to each regularization parameter from the grid:
-    # S$selected corresponding to selected variables
-    S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, 
-      gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
-
-    if (procedure == "LassoMLE") {
-      if (verbose) 
-        print("run the procedure Lasso-MLE")
-      # compute parameter estimations, with the Maximum Likelihood Estimator,
-      # restricted on selected variables.
-      models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, 
-        P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
-    } else {
-      if (verbose) 
-        print("run the procedure Lasso-Rank")
-      # compute parameter estimations, with the Low Rank Estimator, restricted on
-      # selected variables.
-      models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min, 
-        rank.max, ncores_inner, fast, verbose)
-    }
-    # warning! Some models are NULL after running selectVariables
-    models <- models[sapply(models, function(cell) !is.null(cell))]
-    models
-  }
-
-  # List (index k) of lists (index lambda) of models
-  models_list <-
-    if (ncores_outer > 1) {
-      parLapply(cl, kmin:kmax, computeModels)
-    } else {
-      lapply(kmin:kmax, computeModels)
-    }
-  if (ncores_outer > 1) 
-    parallel::stopCluster(cl)
-
-  if (!requireNamespace("capushe", quietly = TRUE))
-  {
-    warning("'capushe' not available: returning all models")
-    return(models_list)
-  }
-
-  # Get summary 'tableauRecap' from models
-  tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
-  {
-    models <- models_list[[i]]
-    # For a collection of models (same k, several lambda):
-    LLH <- sapply(models, function(model) model$llh[1])
-    k <- length(models[[1]]$pi)
-    sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[, 
-      , 1] != 0) + 1) - 1)
-    data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, 
-      complexity = sumPen, contrast = -LLH)
-  }))
-  tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
-
-  modSel <- capushe::capushe(tableauRecap, n)
-  indModSel <- if (selecMod == "DDSE") 
-    as.numeric(modSel@DDSE@model) else if (selecMod == "Djump") 
-    as.numeric(modSel@Djump@model) else if (selecMod == "BIC") 
-    modSel@BIC_capushe$model else if (selecMod == "AIC") 
-    modSel@AIC_capushe$model
-
-  mod <- as.character(tableauRecap[indModSel, 1])
-  listMod <- as.integer(unlist(strsplit(mod, "[.]")))
-  modelSel <- models_list[[listMod[1]]][[listMod[2]]]
-
-  ## Affectations
-  Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n)
-  for (i in 1:n)
-  {
-    for (r in 1:length(modelSel$pi))
-    {
-      sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2)
-      Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * gdet(modelSel$rho[, , r])
-    }
-  }
-  Gam <- Gam/rowSums(Gam)
-  modelSel$affec <- apply(Gam, 1, which.max)
-  modelSel$proba <- Gam
-
-  if (plot)
-    print(plot_valse(X, Y, modelSel, n))
-
-  return(modelSel)
-}
diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R

deleted file mode 100644 (file)

index ec2302d..0000000
--- a/pkg/R/plot_valse.R
+++ /dev/null
@@ -1,89 +0,0 @@
-#' Plot 
-#'
-#' It is a function which plots relevant parameters
-#'
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#' @param model the model constructed by valse procedure
-#' @param n sample size
-#' @return several plots
-#'
-#' @examples TODO
-#'
-#' @export
-#'
-plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
-{
-  require("gridExtra")
-  require("ggplot2")
-  require("reshape2")
-  require("cowplot")
-
-  K <- length(model$pi)
-  ## regression matrices
-  gReg <- list()
-  for (r in 1:K)
-  {
-    Melt <- melt(t((model$phi[, , r])))
-    gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + 
-      geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
-      midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
-  }
-  print(gReg)
-
-  ## Differences between two clusters
-  if (comp)
-  {
-    if (is.na(k1) || is.na(k))
-      print("k1 and k2 must be integers, representing the clusters you want to compare")
-    Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
-    gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
-      + geom_tile()
-      + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
-        space = "Lab")
-      + ggtitle(paste("Difference between regression matrices in cluster", 
-        k1, "and", k2))
-    print(gDiff)
-  }
-
-  ### Covariance matrices
-  matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
-  for (r in 1:K)
-    matCov[, r] <- diag(model$rho[, , r])
-  MeltCov <- melt(matCov)
-  gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
-    + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
-      space = "Lab")
-    + ggtitle("Covariance matrices")
-  print(gCov)
-
-  ### Proportions
-  gam2 <- matrix(NA, ncol = K, nrow = n)
-  for (i in 1:n)
-    gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
-
-  bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2))
-    + geom_boxplot()
-    + theme(legend.position = "none")
-    + background_grid(major = "xy", minor = "none")
-  print(bp)
-
-  ### Mean in each cluster
-  XY <- cbind(X, Y)
-  XY_class <- list()
-  meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2])
-  for (r in 1:K)
-  {
-    XY_class[[r]] <- XY[model$affec == r, ]
-    if (sum(model$affec == r) == 1) {
-      meanPerClass[, r] <- XY_class[[r]]
-    } else {
-      meanPerClass[, r] <- apply(XY_class[[r]], 2, mean)
-    }
-  }
-  data <- data.frame(mean = as.vector(meanPerClass),
-    cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
-  g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
-  print(g + geom_line(aes(linetype = cluster, color = cluster))
-    + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster"))
-}
diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R

deleted file mode 100644 (file)

index d863a4b..0000000
--- a/pkg/R/selectVariables.R
+++ /dev/null
@@ -1,81 +0,0 @@
-#' selectVariables 
-#'
-#' It is a function which construct, for a given lambda, the sets of relevant variables.
-#'
-#' @param phiInit an initial estimator for phi (size: p*m*k)
-#' @param rhoInit an initial estimator for rho (size: m*m*k)
-#' @param piInit\tan initial estimator for pi (size : k)
-#' @param gamInit an initial estimator for gamma
-#' @param mini\t\tminimum number of iterations in EM algorithm
-#' @param maxi\t\tmaximum number of iterations in EM algorithm
-#' @param gamma\t power in the penalty
-#' @param glambda grid of regularization parameters
-#' @param X\t\t\t matrix of regressors
-#' @param Y\t\t\t matrix of responses
-#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
-#' @param eps\t\t threshold to say that EM algorithm has converged
-#' @param ncores Number or cores for parallel execution (1 to disable)
-#'
-#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
-#'
-#' @examples TODO
-#'
-#' @export
-#'
-selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, 
-  glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast)
-{
-  if (ncores > 1) {
-    cl <- parallel::makeCluster(ncores, outfile = "")
-    parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", 
-      "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
-  }
-
-  # Computation for a fixed lambda
-  computeCoefs <- function(lambda)
-  {
-    params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
-      X, Y, eps, fast)
-
-    p <- ncol(X)
-    m <- ncol(Y)
-
-    # selectedVariables: list where element j contains vector of selected variables
-    # in [1,m]
-    selectedVariables <- lapply(1:p, function(j) {
-      # from boolean matrix mxk of selected variables obtain the corresponding boolean
-      # m-vector, and finally return the corresponding indices
-      if (m>1) {
-        seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
-      } else {
-        if (any(params$phi[j, , ] > thresh))
-          1
-        else
-          numeric(0)
-      }
-    })
-
-    list(selected = selectedVariables, Rho = params$rho, Pi = params$pi)
-  }
-
-  # For each lambda in the grid, we compute the coefficients
-  out <-
-    if (ncores > 1) {
-      parLapply(cl, glambda, computeCoefs)
-    } else {
-      lapply(glambda, computeCoefs)
-    }
-  if (ncores > 1) 
-    parallel::stopCluster(cl)
-  # Suppress models which are computed twice En fait, ca ca fait la comparaison de
-  # tous les parametres On veut juste supprimer ceux qui ont les memes variables
-  # sélectionnées
-  # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ]
-  selec <- lapply(out, function(model) model$selected)
-  ind_dup <- duplicated(selec)
-  ind_uniq <- which(!ind_dup)
-  out2 <- list()
-  for (l in 1:length(ind_uniq))
-    out2[[l]] <- out[[ind_uniq[l]]]
-  out2
-}
diff --git a/pkg/R/util.R b/pkg/R/util.R

deleted file mode 100644 (file)

index f8b01cc..0000000
--- a/pkg/R/util.R
+++ /dev/null
@@ -1,7 +0,0 @@
-# ...
-gdet <- function(M)
-{
-       if (is.matrix(M))
-               return (det(M))
-       return (M[1]) #numeric, double
-}
diff --git a/pkg/data/data.RData b/pkg/data/data.RData

deleted file mode 100644 (file)

index a9f09e1..0000000

Binary files a/pkg/data/data.RData and /dev/null differ
diff --git a/pkg/inst/testdata/TODO.csv b/pkg/inst/testdata/TODO.csv

deleted file mode 100644 (file)

index d679966..0000000
--- a/pkg/inst/testdata/TODO.csv
+++ /dev/null
@@ -1 +0,0 @@
-ou alors data_test.RData, possible aussi
diff --git a/pkg/man/valse-package.Rd b/pkg/man/valse-package.Rd

deleted file mode 100644 (file)

index 534375b..0000000
--- a/pkg/man/valse-package.Rd
+++ /dev/null
@@ -1,37 +0,0 @@
-\name{valse-package}
-\alias{valse-package}
-\alias{valse}
-\docType{package}
-
-\title{
-       \packageTitle{valse}
-}
-
-\description{
-       \packageDescription{valse}
-}
-
-\details{
-       The package devtools should be useful in development stage, since we rely on testthat for
-       unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette.
-       Concerning the other suggested packages:
-       \itemize{
-               \item{parallel (generally) permits to run the bootstrap method faster.}
-       }
-
-       The three main functions are ...
-}
-
-\author{
-       \packageAuthor{valse}
-
-       Maintainer: \packageMaintainer{valse}
-}
-
-%\references{
-%      TODO: Literature or other references for background information
-%}
-
-%\examples{
-%      TODO: simple examples of the most important functions
-%}
diff --git a/pkg/src/Makevars b/pkg/src/Makevars

deleted file mode 100644 (file)

index 50b7fb6..0000000
--- a/pkg/src/Makevars
+++ /dev/null
@@ -1,11 +0,0 @@
-#Debug flags
-PKG_CFLAGS=-g -I./sources
-
-#Prod flags:
-#PKG_CFLAGS=-O2 -I./sources
-
-PKG_LIBS=-lm -lgsl -lcblas
-
-SOURCES = $(wildcard adapters/*.c sources/*.c)
-
-OBJECTS = $(SOURCES:.c=.o)
diff --git a/pkg/src/adapters/a.EMGLLF.c b/pkg/src/adapters/a.EMGLLF.c

deleted file mode 100644 (file)

index f24cd2a..0000000
--- a/pkg/src/adapters/a.EMGLLF.c
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <R.h>
-#include <Rdefines.h>
-#include "EMGLLF.h"
-
-// See comments in src/sources/EMGLLF.c and R/EMGLLF.R (wrapper)
-SEXP EMGLLF(
-       SEXP phiInit_,
-       SEXP rhoInit_,
-       SEXP piInit_,
-       SEXP gamInit_,
-       SEXP mini_,
-       SEXP maxi_,
-       SEXP gamma_,
-       SEXP lambda_,
-       SEXP X_,
-       SEXP Y_,
-       SEXP tau_
-) {
-       // Get matrices dimensions
-       int n = INTEGER(getAttrib(X_, R_DimSymbol))[0];
-       SEXP dim = getAttrib(phiInit_, R_DimSymbol);
-       int p = INTEGER(dim)[0];
-       int m = INTEGER(dim)[1];
-       int k = INTEGER(dim)[2];
-
-       ////////////
-       // INPUTS //
-       ////////////
-
-       // get scalar parameters
-       int mini = INTEGER_VALUE(mini_);
-       int maxi = INTEGER_VALUE(maxi_);
-       double gamma = NUMERIC_VALUE(gamma_);
-       double lambda = NUMERIC_VALUE(lambda_);
-       double tau = NUMERIC_VALUE(tau_);
-
-       // Get pointers from SEXP arrays ; WARNING: by columns !
-       double* phiInit = REAL(phiInit_);
-       double* rhoInit = REAL(rhoInit_);
-       double* piInit = REAL(piInit_);
-       double* gamInit = REAL(gamInit_);
-       double* X = REAL(X_);
-       double* Y = REAL(Y_);
-
-       /////////////
-       // OUTPUTS //
-       /////////////
-
-       SEXP phi, rho, pi, LLF, S, affec, dimPhiS, dimRho;
-       PROTECT(dimPhiS = allocVector(INTSXP, 3));
-       int* pDimPhiS = INTEGER(dimPhiS);
-       pDimPhiS[0] = p; pDimPhiS[1] = m; pDimPhiS[2] = k;
-       PROTECT(dimRho = allocVector(INTSXP, 3));
-       int* pDimRho = INTEGER(dimRho);
-       pDimRho[0] = m; pDimRho[1] = m; pDimRho[2] = k;
-       PROTECT(phi = allocArray(REALSXP, dimPhiS));
-       PROTECT(rho = allocArray(REALSXP, dimRho));
-       PROTECT(pi = allocVector(REALSXP, k));
-       PROTECT(LLF = allocVector(REALSXP, maxi-mini+1));
-       PROTECT(S = allocArray(REALSXP, dimPhiS));
-       PROTECT(affec = allocVector(INTSXP, n));
-       double *pPhi=REAL(phi), *pRho=REAL(rho), *pPi=REAL(pi), *pLLF=REAL(LLF), *pS=REAL(S);
-       int *pAffec=INTEGER(affec);
-
-       ////////////////////
-       // Call to EMGLLF //
-       ////////////////////
-
-       EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,
-               pPhi,pRho,pPi,pLLF,pS,pAffec,
-               n,p,m,k);
-
-       // Build list from OUT params and return it
-       SEXP listParams, listNames;
-       int nouts = 6;
-       PROTECT(listParams = allocVector(VECSXP, nouts));
-       char* lnames[6] = {"phi", "rho", "pi", "LLF", "S", "affec"}; //lists labels
-       PROTECT(listNames = allocVector(STRSXP,nouts));
-       for (int i=0; i<nouts; i++)
-               SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
-       setAttrib(listParams, R_NamesSymbol, listNames);
-       SET_VECTOR_ELT(listParams, 0, phi);
-       SET_VECTOR_ELT(listParams, 1, rho);
-       SET_VECTOR_ELT(listParams, 2, pi);
-       SET_VECTOR_ELT(listParams, 3, LLF);
-       SET_VECTOR_ELT(listParams, 4, S);
-       SET_VECTOR_ELT(listParams, 5, affec);
-
-       UNPROTECT(10);
-       return listParams;
-}
diff --git a/pkg/src/adapters/a.EMGrank.c b/pkg/src/adapters/a.EMGrank.c

deleted file mode 100644 (file)

index b1dad9b..0000000
--- a/pkg/src/adapters/a.EMGrank.c
+++ /dev/null
@@ -1,73 +0,0 @@
-#include <R.h>
-#include <Rdefines.h>
-#include "EMGrank.h"
-
-// See comments in src/sources/EMGrank.c and R/EMGrank.R (wrapper)
-SEXP EMGrank(
-       SEXP Pi_,
-       SEXP Rho_,
-       SEXP mini_,
-       SEXP maxi_,
-       SEXP X_,
-       SEXP Y_,
-       SEXP tau_,
-       SEXP rank_
-) {
-       // Get matrices dimensions
-       SEXP dimX = getAttrib(X_, R_DimSymbol);
-       int n = INTEGER(dimX)[0];
-       int p = INTEGER(dimX)[1];
-       SEXP dimRho = getAttrib(Rho_, R_DimSymbol);
-       int m = INTEGER(dimRho)[0];
-       int k = INTEGER(dimRho)[2];
-
-       ////////////
-       // INPUTS //
-       ////////////
-
-       // get scalar parameters
-       int mini = INTEGER_VALUE(mini_);
-       int maxi = INTEGER_VALUE(maxi_);
-       double tau = NUMERIC_VALUE(tau_);
-
-       // Get pointers from SEXP arrays ; WARNING: by columns !
-       double* Pi = REAL(Pi_);
-       double* Rho = REAL(Rho_);
-       double* X = REAL(X_);
-       double* Y = REAL(Y_);
-       int* rank = INTEGER(rank_);
-
-       /////////////
-       // OUTPUTS //
-       /////////////
-
-       SEXP phi, LLF, dimPhi;
-       PROTECT(dimPhi = allocVector(INTSXP, 3));
-       int* pDimPhi = INTEGER(dimPhi);
-       pDimPhi[0] = p; pDimPhi[1] = m; pDimPhi[2] = k;
-       PROTECT(phi = allocArray(REALSXP, dimPhi));
-       PROTECT(LLF = allocVector(REALSXP, 1));
-       double *pPhi=REAL(phi), *pLLF=REAL(LLF);
-
-       /////////////////////
-       // Call to EMGrank //
-       /////////////////////
-
-       EMGrank_core(Pi, Rho, mini, maxi, X, Y, tau, rank,
-               pPhi,pLLF,
-               n,p,m,k);
-
-       // Build list from OUT params and return it
-       SEXP listParams, listNames;
-       PROTECT(listParams = allocVector(VECSXP, 2));
-       char* lnames[2] = {"phi", "LLF"}; //lists labels
-       PROTECT(listNames = allocVector(STRSXP,2));
-       for (int i=0; i<2; i++)
-               SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
-       setAttrib(listParams, R_NamesSymbol, listNames);
-       SET_VECTOR_ELT(listParams, 0, phi);
-       SET_VECTOR_ELT(listParams, 1, LLF);
-
-       UNPROTECT(5);
-       return listParams;
-}
diff --git a/pkg/src/sources/EMGLLF.c b/pkg/src/sources/EMGLLF.c

deleted file mode 100644 (file)

index d2f5a8e..0000000
--- a/pkg/src/sources/EMGLLF.c
+++ /dev/null
@@ -1,412 +0,0 @@
-#include "utils.h"
-#include <stdlib.h>
-#include <math.h>
-#include <gsl/gsl_linalg.h>
-
-// TODO: don't recompute indexes ai(...) and mi(...) when possible
-void EMGLLF_core(
-       // IN parameters
-       const Real* phiInit, // parametre initial de moyenne renormalisé
-       const Real* rhoInit, // parametre initial de variance renormalisé
-       const Real* piInit,      // parametre initial des proportions
-       const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon
-       int mini, // nombre minimal d'itérations dans l'algorithme EM
-       int maxi, // nombre maximal d'itérations dans l'algorithme EM
-       Real gamma, // puissance des proportions dans la pénalisation pour un Lasso adaptatif
-       Real lambda, // valeur du paramètre de régularisation du Lasso
-       const Real* X, // régresseurs
-       const Real* Y, // réponse
-       Real tau, // seuil pour accepter la convergence
-       // OUT parameters (all pointers, to be modified)
-       Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
-       Real* rho, // parametre de variance renormalisé, calculé par l'EM
-       Real* pi, // parametre des proportions renormalisé, calculé par l'EM
-       Real* llh, // (derniere) log vraisemblance associée à cet échantillon,
-                  // pour les valeurs estimées des paramètres
-       Real* S,
-       int* affec,
-       // additional size parameters
-       int n, // nombre d'echantillons
-       int p, // nombre de covariables
-       int m, // taille de Y (multivarié)
-       int k) // nombre de composantes dans le mélange
-{
-       //Initialize outputs
-       copyArray(phiInit, phi, p*m*k);
-       copyArray(rhoInit, rho, m*m*k);
-       copyArray(piInit, pi, k);
-       //S is already allocated, and doesn't need to be 'zeroed'
-
-       //Other local variables: same as in R
-       Real* gam = (Real*)malloc(n*k*sizeof(Real));
-       copyArray(gamInit, gam, n*k);
-       Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real));
-       Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real));
-       Real* b = (Real*)malloc(k*sizeof(Real));
-       Real* X2 = (Real*)malloc(n*p*k*sizeof(Real));
-       Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real));
-       *llh = -INFINITY;
-       Real* pi2 = (Real*)malloc(k*sizeof(Real));
-       const Real EPS = 1e-15;
-       // Additional (not at this place, in R file)
-       Real* gam2 = (Real*)malloc(k*sizeof(Real));
-       Real* sqNorm2 = (Real*)malloc(k*sizeof(Real));
-       Real* detRho = (Real*)malloc(k*sizeof(Real));
-       gsl_matrix* matrix = gsl_matrix_alloc(m, m);
-       gsl_permutation* permutation = gsl_permutation_alloc(m);
-       Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
-       Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
-       const Real gaussConstM = pow(2.*M_PI,m/2.);
-       Real* Phi = (Real*)malloc(p*m*k*sizeof(Real));
-       Real* Rho = (Real*)malloc(m*m*k*sizeof(Real));
-       Real* Pi = (Real*)malloc(k*sizeof(Real));
-
-       for (int ite=1; ite<=maxi; ite++)
-       {
-               copyArray(phi, Phi, p*m*k);
-               copyArray(rho, Rho, m*m*k);
-               copyArray(pi, Pi, k);
-
-               // Calculs associés a Y et X
-               for (int r=0; r<k; r++)
-               {
-                       for (int mm=0; mm<m; mm++)
-                       {
-                               //Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
-                               for (int u=0; u<n; u++)
-                                       Y2[ai(u,mm,r,n,m,k)] = sqrt(gam[mi(u,r,n,k)]) * Y[mi(u,mm,n,m)];
-                       }
-                       for (int i=0; i<n; i++)
-                       {
-                               //X2[i,,r] = sqrt(gam[i,r]) * X[i,]
-                               for (int u=0; u<p; u++)
-                                       X2[ai(i,u,r,n,p,k)] = sqrt(gam[mi(i,r,n,k)]) * X[mi(i,u,n,p)];
-                       }
-                       for (int mm=0; mm<m; mm++)
-                       {
-                               //ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
-                               for (int u=0; u<p; u++)
-                               {
-                                       Real dotProduct = 0.;
-                                       for (int v=0; v<n; v++)
-                                               dotProduct += X2[ai(v,u,r,n,p,k)] * Y2[ai(v,mm,r,n,m,k)];
-                                       ps2[ai(u,mm,r,p,m,k)] = dotProduct;
-                               }
-                       }
-                       for (int j=0; j<p; j++)
-                       {
-                               for (int s=0; s<p; s++)
-                               {
-                                       //Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
-                                       Real dotProduct = 0.;
-                                       for (int u=0; u<n; u++)
-                                               dotProduct += X2[ai(u,j,r,n,p,k)] * X2[ai(u,s,r,n,p,k)];
-                                       Gram2[ai(j,s,r,p,p,k)] = dotProduct;
-                               }
-                       }
-               }
-
-               /////////////
-               // Etape M //
-               /////////////
-
-               // Pour pi
-               for (int r=0; r<k; r++)
-               {
-                       //b[r] = sum(abs(phi[,,r]))
-                       Real sumAbsPhi = 0.;
-                       for (int u=0; u<p; u++)
-                               for (int v=0; v<m; v++)
-                                       sumAbsPhi += fabs(phi[ai(u,v,r,p,m,k)]);
-                       b[r] = sumAbsPhi;
-               }
-               //gam2 = colSums(gam)
-               for (int u=0; u<k; u++)
-               {
-                       Real sumOnColumn = 0.;
-                       for (int v=0; v<n; v++)
-                               sumOnColumn += gam[mi(v,u,n,k)];
-                       gam2[u] = sumOnColumn;
-               }
-               //a = sum(gam %*% log(pi))
-               Real a = 0.;
-               for (int u=0; u<n; u++)
-               {
-                       Real dotProduct = 0.;
-                       for (int v=0; v<k; v++)
-                               dotProduct += gam[mi(u,v,n,k)] * log(pi[v]);
-                       a += dotProduct;
-               }
-
-               //tant que les proportions sont negatives
-               int kk = 0,
-                       pi2AllPositive = 0;
-               Real invN = 1./n;
-               while (!pi2AllPositive)
-               {
-                       //pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
-                       Real pow_01_kk = pow(0.1,kk);
-                       for (int r=0; r<k; r++)
-                               pi2[r] = pi[r] + pow_01_kk * (invN*gam2[r] - pi[r]);
-                       //pi2AllPositive = all(pi2 >= 0)
-                       pi2AllPositive = 1;
-                       for (int r=0; r<k; r++)
-                       {
-                               if (pi2[r] < 0)
-                               {
-                                       pi2AllPositive = 0;
-                                       break;
-                               }
-                       }
-                       kk++;
-               }
-
-               //sum(pi^gamma * b)
-               Real piPowGammaDotB = 0.;
-               for (int v=0; v<k; v++)
-                       piPowGammaDotB += pow(pi[v],gamma) * b[v];
-               //sum(pi2^gamma * b)
-               Real pi2PowGammaDotB = 0.;
-               for (int v=0; v<k; v++)
-                       pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
-               //sum(gam2 * log(pi2))
-               Real gam2DotLogPi2 = 0.;
-               for (int v=0; v<k; v++)
-                       gam2DotLogPi2 += gam2[v] * log(pi2[v]);
-
-               //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
-               while (-invN*a + lambda*piPowGammaDotB < -invN*gam2DotLogPi2 + lambda*pi2PowGammaDotB
-                       && kk<1000)
-               {
-                       Real pow_01_kk = pow(0.1,kk);
-                       //pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
-                       for (int v=0; v<k; v++)
-                               pi2[v] = pi[v] + pow_01_kk * (invN*gam2[v] - pi[v]);
-                       //pi2 was updated, so we recompute pi2PowGammaDotB and gam2DotLogPi2
-                       pi2PowGammaDotB = 0.;
-                       for (int v=0; v<k; v++)
-                               pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
-                       gam2DotLogPi2 = 0.;
-                       for (int v=0; v<k; v++)
-                               gam2DotLogPi2 += gam2[v] * log(pi2[v]);
-                       kk++;
-               }
-               Real t = pow(0.1,kk);
-               //sum(pi + t*(pi2-pi))
-               Real sumPiPlusTbyDiff = 0.;
-               for (int v=0; v<k; v++)
-                       sumPiPlusTbyDiff += (pi[v] + t*(pi2[v] - pi[v]));
-               //pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-               for (int v=0; v<k; v++)
-                       pi[v] = (pi[v] + t*(pi2[v] - pi[v])) / sumPiPlusTbyDiff;
-
-               //Pour phi et rho
-               for (int r=0; r<k; r++)
-               {
-                       for (int mm=0; mm<m; mm++)
-                       {
-                               Real ps = 0.,
-                                       nY2 = 0.;
-                               // Compute ps, and nY2 = sum(Y2[,mm,r]^2)
-                               for (int i=0; i<n; i++)
-                               {
-                                       //< X2[i,,r] , phi[,mm,r] >
-                                       Real dotProduct = 0.;
-                                       for (int u=0; u<p; u++)
-                                               dotProduct += X2[ai(i,u,r,n,p,k)] * phi[ai(u,mm,r,p,m,k)];
-                                       //ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
-                                       ps += Y2[ai(i,mm,r,n,m,k)] * dotProduct;
-                                       nY2 += Y2[ai(i,mm,r,n,m,k)] * Y2[ai(i,mm,r,n,m,k)];
-                               }
-                               //rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
-                               rho[ai(mm,mm,r,m,m,k)] = (ps + sqrt(ps*ps + 4*nY2 * gam2[r])) / (2*nY2);
-                       }
-               }
-
-               for (int r=0; r<k; r++)
-               {
-                       for (int j=0; j<p; j++)
-                       {
-                               for (int mm=0; mm<m; mm++)
-                               {
-                                       //sum(phi[-j,mm,r] * Gram2[j,-j,r])
-                                       Real phiDotGram2 = 0.;
-                                       for (int u=0; u<p; u++)
-                                       {
-                                               if (u != j)
-                                                       phiDotGram2 += phi[ai(u,mm,r,p,m,k)] * Gram2[ai(j,u,r,p,p,k)];
-                                       }
-                                       //S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
-                                       S[ai(j,mm,r,p,m,k)] = -rho[ai(mm,mm,r,m,m,k)] * ps2[ai(j,mm,r,p,m,k)]
-                                               + phiDotGram2;
-                                       Real pirPowGamma = pow(pi[r],gamma);
-                                       if (fabs(S[ai(j,mm,r,p,m,k)]) <= n*lambda*pirPowGamma)
-                                               phi[ai(j,mm,r,p,m,k)] = 0.;
-                                       else if (S[ai(j,mm,r,p,m,k)] > n*lambda*pirPowGamma)
-                                       {
-                                               phi[ai(j,mm,r,p,m,k)] = (n*lambda*pirPowGamma - S[ai(j,mm,r,p,m,k)])
-                                                       / Gram2[ai(j,j,r,p,p,k)];
-                                       }
-                                       else
-                                       {
-                                               phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pirPowGamma + S[ai(j,mm,r,p,m,k)])
-                                                       / Gram2[ai(j,j,r,p,p,k)];
-                                       }
-                               }
-                       }
-               }
-
-               /////////////
-               // Etape E //
-               /////////////
-
-               // Precompute det(rho[,,r]) for r in 1...k
-               int signum;
-               for (int r=0; r<k; r++)
-               {
-                       for (int u=0; u<m; u++)
-                       {
-                               for (int v=0; v<m; v++)
-                                       matrix->data[u*m+v] = rho[ai(u,v,r,m,m,k)];
-                       }
-                       gsl_linalg_LU_decomp(matrix, permutation, &signum);
-                       detRho[r] = gsl_linalg_LU_det(matrix, signum);
-               }
-
-               Real sumLogLLH = 0.;
-               for (int i=0; i<n; i++)
-               {
-                       for (int r=0; r<k; r++)
-                       {
-                               //compute Y[i,]%*%rho[,,r]
-                               for (int u=0; u<m; u++)
-                               {
-                                       YiRhoR[u] = 0.;
-                                       for (int v=0; v<m; v++)
-                                               YiRhoR[u] += Y[mi(i,v,n,m)] * rho[ai(v,u,r,m,m,k)];
-                               }
-
-                               //compute X[i,]%*%phi[,,r]
-                               for (int u=0; u<m; u++)
-                               {
-                                       XiPhiR[u] = 0.;
-                                       for (int v=0; v<p; v++)
-                                               XiPhiR[u] += X[mi(i,v,n,p)] * phi[ai(v,u,r,p,m,k)];
-                               }
-
-                               //compute sq norm || Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) ||_2^2
-                               sqNorm2[r] = 0.;
-                               for (int u=0; u<m; u++)
-                                       sqNorm2[r] += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
-                       }
-
-                       Real sumGamI = 0.;
-                       for (int r=0; r<k; r++)
-                       {
-                               gam[mi(i,r,n,k)] = pi[r] * exp(-.5*sqNorm2[r]) * detRho[r];
-                               sumGamI += gam[mi(i,r,n,k)];
-                       }
-
-                       sumLogLLH += log(sumGamI) - log(gaussConstM);
-                       if (sumGamI > EPS) //else: gam[i,] is already ~=0
-                       {
-                               for (int r=0; r<k; r++)
-                                       gam[mi(i,r,n,k)] /= sumGamI;
-                       }
-               }
-
-               //sumPen = sum(pi^gamma * b)
-               Real sumPen = 0.;
-               for (int r=0; r<k; r++)
-                       sumPen += pow(pi[r],gamma) * b[r];
-               Real last_llh = *llh;
-               //llh = -sumLogLLH/n + lambda*sumPen
-               *llh = -invN * sumLogLLH + lambda * sumPen;
-               Real dist = ite==1 ? *llh : (*llh - last_llh) / (1. + fabs(*llh));
-
-               //Dist1 = max( abs(phi-Phi) / (1+abs(phi)) )
-               Real Dist1 = 0.;
-               for (int u=0; u<p; u++)
-               {
-                       for (int v=0; v<m; v++)
-                       {
-                               for (int w=0; w<k; w++)
-                               {
-                                       Real tmpDist = fabs(phi[ai(u,v,w,p,m,k)]-Phi[ai(u,v,w,p,m,k)])
-                                               / (1.+fabs(phi[ai(u,v,w,p,m,k)]));
-                                       if (tmpDist > Dist1)
-                                               Dist1 = tmpDist;
-                               }
-                       }
-               }
-               //Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
-               Real Dist2 = 0.;
-               for (int u=0; u<m; u++)
-               {
-                       for (int v=0; v<m; v++)
-                       {
-                               for (int w=0; w<k; w++)
-                               {
-                                       Real tmpDist = fabs(rho[ai(u,v,w,m,m,k)]-Rho[ai(u,v,w,m,m,k)])
-                                               / (1.+fabs(rho[ai(u,v,w,m,m,k)]));
-                                       if (tmpDist > Dist2)
-                                               Dist2 = tmpDist;
-                               }
-                       }
-               }
-               //Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)))
-               Real Dist3 = 0.;
-               for (int u=0; u<n; u++)
-               {
-                       for (int v=0; v<k; v++)
-                       {
-                               Real tmpDist = fabs(pi[v]-Pi[v]) / (1.+fabs(pi[v]));
-                               if (tmpDist > Dist3)
-                                       Dist3 = tmpDist;
-                       }
-               }
-               //dist2=max([max(Dist1),max(Dist2),max(Dist3)]);
-               Real dist2 = Dist1;
-               if (Dist2 > dist2)
-                       dist2 = Dist2;
-               if (Dist3 > dist2)
-                       dist2 = Dist3;
-
-               if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
-                       break;
-       }
-
-       //affec = apply(gam, 1, which.max)
-       for (int i=0; i<n; i++)
-       {
-               Real rowMax = 0.;
-               affec[i] = 0;
-               for (int j=0; j<k; j++)
-               {
-                       if (gam[mi(i,j,n,k)] > rowMax)
-                       {
-                               affec[i] = j+1; //R indices start at 1
-                               rowMax = gam[mi(i,j,n,k)];
-                       }
-               }
-       }
-
-       //free memory
-       free(b);
-       free(gam);
-       free(Phi);
-       free(Rho);
-       free(Pi);
-       free(Gram2);
-       free(ps2);
-       free(detRho);
-       gsl_matrix_free(matrix);
-       gsl_permutation_free(permutation);
-       free(XiPhiR);
-       free(YiRhoR);
-       free(gam2);
-       free(pi2);
-       free(X2);
-       free(Y2);
-       free(sqNorm2);
-}
diff --git a/pkg/src/sources/EMGLLF.h b/pkg/src/sources/EMGLLF.h

deleted file mode 100644 (file)

index e15cb87..0000000
--- a/pkg/src/sources/EMGLLF.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef valse_EMGLLF_H
-#define valse_EMGLLF_H
-
-#include "utils.h"
-
-void EMGLLF_core(
-       // IN parameters
-       const Real* phiInit,
-       const Real* rhoInit,
-       const Real* piInit,
-       const Real* gamInit,
-       int mini,
-       int maxi,
-       Real gamma,
-       Real lambda,
-       const Real* X,
-       const Real* Y,
-       Real tau,
-       // OUT parameters
-       Real* phi,
-       Real* rho,
-       Real* pi,
-       Real* LLF,
-       Real* S,
-       int* affec,
-       // additional size parameters
-       int n,
-       int p,
-       int m,
-       int k);
-
-#endif
diff --git a/pkg/src/sources/EMGrank.c b/pkg/src/sources/EMGrank.c

deleted file mode 100644 (file)

index 3a9bf94..0000000
--- a/pkg/src/sources/EMGrank.c
+++ /dev/null
@@ -1,307 +0,0 @@
-#include <stdlib.h>
-#include <gsl/gsl_linalg.h>
-#include "utils.h"
-
-// Compute pseudo-inverse of a square matrix
-static Real* pinv(const Real* matrix, int dim)
-{
-       gsl_matrix* U = gsl_matrix_alloc(dim,dim);
-       gsl_matrix* V = gsl_matrix_alloc(dim,dim);
-       gsl_vector* S = gsl_vector_alloc(dim);
-       gsl_vector* work = gsl_vector_alloc(dim);
-       Real EPS = 1e-10; //threshold for singular value "== 0"
-       
-       //copy matrix into U
-       copyArray(matrix, U->data, dim*dim);
-
-       //U,S,V = SVD of matrix
-       gsl_linalg_SV_decomp(U, V, S, work);
-       gsl_vector_free(work);
-
-       // Obtain pseudo-inverse by V*S^{-1}*t(U)
-       Real* inverse = (Real*)malloc(dim*dim*sizeof(Real));
-       for (int i=0; i<dim; i++)
-       {
-               for (int ii=0; ii<dim; ii++)
-               {
-                       Real dotProduct = 0.0;
-                       for (int j=0; j<dim; j++)
-                               dotProduct += V->data[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j];
-                       inverse[i*dim+ii] = dotProduct;
-               }
-       }
-       
-       gsl_matrix_free(U);
-       gsl_matrix_free(V);
-       gsl_vector_free(S);
-       return inverse;
-}
-
-// TODO: comment EMGrank purpose
-void EMGrank_core(
-       // IN parameters
-       const Real* Pi, // parametre de proportion
-       const Real* Rho, // parametre initial de variance renormalisé
-       int mini, // nombre minimal d'itérations dans l'algorithme EM
-       int maxi, // nombre maximal d'itérations dans l'algorithme EM
-       const Real* X, // régresseurs
-       const Real* Y, // réponse
-       Real tau, // seuil pour accepter la convergence
-       const int* rank, // vecteur des rangs possibles
-       // OUT parameters
-       Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
-       Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
-       // additional size parameters
-       int n, // taille de l'echantillon
-       int p, // nombre de covariables
-       int m, // taille de Y (multivarié)
-       int k) // nombre de composantes
-{
-       // Allocations, initializations
-       Real* Phi = (Real*)calloc(p*m*k,sizeof(Real));
-       Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real));
-       int signum;
-       Real invN = 1.0/n;
-       int deltaPhiBufferSize = 20;
-       Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real));
-       int ite = 0;
-       Real sumDeltaPhi = 0.0;
-       Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
-       Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
-       Real* Xr = (Real*)malloc(n*p*sizeof(Real));
-       Real* Yr = (Real*)malloc(n*m*sizeof(Real));
-       Real* tXrXr = (Real*)malloc(p*p*sizeof(Real));
-       Real* tXrYr = (Real*)malloc(p*m*sizeof(Real));
-       gsl_matrix* matrixM = gsl_matrix_alloc(p, m);
-       gsl_matrix* matrixE = gsl_matrix_alloc(m, m);
-       gsl_permutation* permutation = gsl_permutation_alloc(m);
-       gsl_matrix* V = gsl_matrix_alloc(m,m);
-       gsl_vector* S = gsl_vector_alloc(m);
-       gsl_vector* work = gsl_vector_alloc(m);
-
-       //Initialize class memberships (all elements in class 0; TODO: randomize ?)
-       int* Z = (int*)calloc(n, sizeof(int));
-
-       //Initialize phi to zero, because some M loops might exit before phi affectation
-       zeroArray(phi, p*m*k);
-
-       while (ite<mini || (ite<maxi && sumDeltaPhi>tau))
-       {
-               /////////////
-               // Etape M //
-               /////////////
-               
-               //M step: Mise à jour de Beta (et donc phi)
-               for (int r=0; r<k; r++)
-               {
-                       //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:)
-                       int cardClustR=0;
-                       for (int i=0; i<n; i++)
-                       {
-                               if (Z[i] == r)
-                               {
-                                       for (int j=0; j<p; j++)
-                                               Xr[mi(cardClustR,j,n,p)] = X[mi(i,j,n,p)];
-                                       for (int j=0; j<m; j++)
-                                               Yr[mi(cardClustR,j,n,m)] = Y[mi(i,j,n,m)];
-                                       cardClustR++;
-                               }
-                       }
-                       if (cardClustR == 0)
-                               continue;
-
-                       //Compute tXrXr = t(Xr) * Xr
-                       for (int j=0; j<p; j++)
-                       {
-                               for (int jj=0; jj<p; jj++)
-                               {
-                                       Real dotProduct = 0.0;
-                                       for (int u=0; u<cardClustR; u++)
-                                               dotProduct += Xr[mi(u,j,n,p)] * Xr[mi(u,jj,n,p)];
-                                       tXrXr[mi(j,jj,p,p)] = dotProduct;
-                               }
-                       }
-
-                       //Get pseudo inverse = (t(Xr)*Xr)^{-1}
-                       Real* invTXrXr = pinv(tXrXr, p);
-                       
-                       // Compute tXrYr = t(Xr) * Yr
-                       for (int j=0; j<p; j++)
-                       {
-                               for (int jj=0; jj<m; jj++)
-                               {
-                                       Real dotProduct = 0.0;
-                                       for (int u=0; u<cardClustR; u++)
-                                               dotProduct += Xr[mi(u,j,n,p)] * Yr[mi(u,jj,n,m)];
-                                       tXrYr[mi(j,jj,p,m)] = dotProduct;
-                               }
-                       }
-
-                       //Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr
-                       for (int j=0; j<p; j++)
-                       {
-                               for (int jj=0; jj<m; jj++)
-                               {
-                                       Real dotProduct = 0.0;
-                                       for (int u=0; u<p; u++)
-                                               dotProduct += invTXrXr[mi(j,u,p,p)] * tXrYr[mi(u,jj,p,m)];
-                                       matrixM->data[j*m+jj] = dotProduct;
-                               }
-                       }
-                       free(invTXrXr);
-
-                       //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
-                       gsl_linalg_SV_decomp(matrixM, V, S, work);
-
-                       //Set m-rank(r) singular values to zero, and recompose
-                       //best rank(r) approximation of the initial product
-                       for (int j=rank[r]; j<m; j++)
-                               S->data[j] = 0.0;
-                       
-                       //[intermediate step] Compute hatBetaR = U * S * t(V)
-                       double* U = matrixM->data; //GSL require double precision
-                       for (int j=0; j<p; j++)
-                       {
-                               for (int jj=0; jj<m; jj++)
-                               {
-                                       Real dotProduct = 0.0;
-                                       for (int u=0; u<m; u++)
-                                               dotProduct += U[j*m+u] * S->data[u] * V->data[jj*m+u];
-                                       hatBetaR[mi(j,jj,p,m)] = dotProduct;
-                               }
-                       }
-
-                       //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r)
-                       for (int j=0; j<p; j++)
-                       {
-                               for (int jj=0; jj<m; jj++)
-                               {
-                                       Real dotProduct=0.0;
-                                       for (int u=0; u<m; u++)
-                                               dotProduct += hatBetaR[mi(j,u,p,m)] * Rho[ai(u,jj,r,m,m,k)];
-                                       phi[ai(j,jj,r,p,m,k)] = dotProduct;
-                               }
-               }
-               }
-
-               /////////////
-               // Etape E //
-               /////////////
-               
-               Real sumLogLLF2 = 0.0;
-               for (int i=0; i<n; i++)
-               {
-                       Real sumLLF1 = 0.0;
-                       Real maxLogGamIR = -INFINITY;
-                       for (int r=0; r<k; r++)
-                       {
-                               //Compute
-                               //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)...
-                               //*phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) );
-                               //split in several sub-steps
-                               
-                               //compute det(Rho(:,:,r)) [TODO: avoid re-computations]
-                               for (int j=0; j<m; j++)
-                               {
-                                       for (int jj=0; jj<m; jj++)
-                                               matrixE->data[j*m+jj] = Rho[ai(j,jj,r,m,m,k)];
-                               }
-                               gsl_linalg_LU_decomp(matrixE, permutation, &signum);
-                               Real detRhoR = gsl_linalg_LU_det(matrixE, signum);
-
-                               //compute Y(i,:)*Rho(:,:,r)
-                               for (int j=0; j<m; j++)
-                               {
-                                       YiRhoR[j] = 0.0;
-                                       for (int u=0; u<m; u++)
-                                               YiRhoR[j] += Y[mi(i,u,n,m)] * Rho[ai(u,j,r,m,m,k)];
-                               }
-
-                               //compute X(i,:)*phi(:,:,r)
-                               for (int j=0; j<m; j++)
-                               {
-                                       XiPhiR[j] = 0.0;
-                                       for (int u=0; u<p; u++)
-                                               XiPhiR[j] += X[mi(i,u,n,p)] * phi[ai(u,j,r,p,m,k)];
-                               }
-
-                               //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) >
-                               Real dotProduct = 0.0;
-                               for (int u=0; u<m; u++)
-                                       dotProduct += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
-                               Real logGamIR = log(Pi[r]) + log(detRhoR) - 0.5*dotProduct;
-
-                               //Z(i) = index of max (gam(i,:))
-                               if (logGamIR > maxLogGamIR)
-                               {
-                                       Z[i] = r;
-                                       maxLogGamIR = logGamIR;
-                               }
-                               sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0);
-                       }
-
-                       sumLogLLF2 += log(sumLLF1);
-               }
-
-               // Assign output variable LLF
-               *LLF = -invN * sumLogLLF2;
-
-               //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi))));
-               Real newDeltaPhi = 0.0;
-               for (int j=0; j<p; j++)
-               {
-                       for (int jj=0; jj<m; jj++)
-                       {
-                               for (int r=0; r<k; r++)
-                               {
-                                       Real tmpDist = fabs(phi[ai(j,jj,r,p,m,k)]-Phi[ai(j,jj,r,p,m,k)])
-                                               / (1.0+fabs(phi[ai(j,jj,r,p,m,k)]));
-                                       if (tmpDist > newDeltaPhi)
-                                               newDeltaPhi = tmpDist;
-                               }
-                       }
-               }
-
-               //update distance parameter to check algorithm convergence (delta(phi, Phi))
-               //TODO: deltaPhi should be a linked list for perf.
-               if (ite < deltaPhiBufferSize)
-                       deltaPhi[ite] = newDeltaPhi;
-               else
-               {
-                       sumDeltaPhi -= deltaPhi[0];
-                       for (int u=0; u<deltaPhiBufferSize-1; u++)
-                               deltaPhi[u] = deltaPhi[u+1];
-                       deltaPhi[deltaPhiBufferSize-1] = newDeltaPhi;
-               }
-               sumDeltaPhi += newDeltaPhi;
-
-               // update other local variables
-               for (int j=0; j<m; j++)
-               {
-                       for (int jj=0; jj<p; jj++)
-                       {
-                               for (int r=0; r<k; r++)
-                                       Phi[ai(jj,j,r,p,m,k)] = phi[ai(jj,j,r,p,m,k)];
-                       }
-               }
-               ite++;
-       }
-
-       //free memory
-       free(hatBetaR);
-       free(deltaPhi);
-       free(Phi);
-       gsl_matrix_free(matrixE);
-       gsl_matrix_free(matrixM);
-       gsl_permutation_free(permutation);
-       gsl_vector_free(work);
-       gsl_matrix_free(V);
-       gsl_vector_free(S);
-       free(XiPhiR);
-       free(YiRhoR);
-       free(Xr);
-       free(Yr);
-       free(tXrXr);
-       free(tXrYr);
-       free(Z);
-}
diff --git a/pkg/src/sources/EMGrank.h b/pkg/src/sources/EMGrank.h

deleted file mode 100644 (file)

index b7367d8..0000000
--- a/pkg/src/sources/EMGrank.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef valse_EMGrank_H
-#define valse_EMGrank_H
-
-#include "utils.h"
-
-void EMGrank_core(
-       // IN parameters
-       const Real* Pi,
-       const Real* Rho,
-       int mini,
-       int maxi,
-       const Real* X,
-       const Real* Y,
-       Real tau,
-       const int* rank,
-       // OUT parameters
-       Real* phi,
-       Real* LLF,
-       // additional size parameters
-       int n,
-       int p,
-       int m,
-       int k);
-
-#endif
diff --git a/pkg/src/sources/utils.h b/pkg/src/sources/utils.h

deleted file mode 100644 (file)

index be3a72f..0000000
--- a/pkg/src/sources/utils.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef valse_utils_H
-#define valse_utils_H
-
-//#include <stdint.h>
-
-/********
- * Types
- *******/
-
-typedef double Real;
-//typedef uint32_t UInt;
-//typedef int32_t Int;
-
-/*******************************
- * Matrix and arrays indexation
- *******************************/
-
-// Matrix Index ; TODO? ncol unused
-#define mi(i,j,nrow,ncol)\
-       j*nrow + i
-
-// Array Index ; TODO? d3 unused
-#define ai(i,j,k,d1,d2,d3)\
-       k*d1*d2 + j*d1 + i
-
-// Array4 Index ; TODO? ...
-#define ai4(i,j,k,m,d1,d2,d3,d4)\
-       m*d1*d2*d3 + k*d1*d2 + j*d1 + i
-
-/*************************
- * Array copy & "zeroing"
- ************************/
-
-// Fill an array with zeros
-#define zeroArray(array, size)\
-{\
-       for (int u=0; u<size; u++)\
-               array[u] = 0;\
-}
-
-// Copy an 1D array
-#define copyArray(array, copy, size)\
-{\
-       for (int u=0; u<size; u++)\
-               copy[u] = array[u];\
-}
-
-#endif
diff --git a/pkg/tests/testthat.R b/pkg/tests/testthat.R

deleted file mode 100644 (file)

index 88e5631..0000000
--- a/pkg/tests/testthat.R
+++ /dev/null
@@ -1,4 +0,0 @@
-library(testthat)
-library(valse) #ou load_all()
-
-test_check("valse")
diff --git a/pkg/tests/testthat/helper-context1.R b/pkg/tests/testthat/helper-context1.R

deleted file mode 100644 (file)

index b40f358..0000000
--- a/pkg/tests/testthat/helper-context1.R
+++ /dev/null
@@ -1,5 +0,0 @@
-# Potential helpers for context 1
-help <- function()
-{
-       #...
-}
diff --git a/pkg/tests/testthat/test-context1.R b/pkg/tests/testthat/test-context1.R

deleted file mode 100644 (file)

index 17c633f..0000000
--- a/pkg/tests/testthat/test-context1.R
+++ /dev/null
@@ -1,11 +0,0 @@
-context("Context1")
-
-test_that("function 1...",
-{
-       #expect_lte( ..., ...)
-})
-
-test_that("function 2...",
-{
-       #expect_equal(..., ...)
-})
diff --git a/pkg/vignettes/.gitignore b/pkg/vignettes/.gitignore

deleted file mode 100644 (file)

index e6493d4..0000000
--- a/pkg/vignettes/.gitignore
+++ /dev/null
@@ -1,14 +0,0 @@
-#ignore jupyter generated file (ipynb, HTML)
-*.html
-*.ipynb
-
-#and various (pdf)LaTeX files, in case of
-*.tex
-*.pdf
-*.aux
-*.dvi
-*.log
-*.out
-*.toc
-*.synctex.gz
-/figure/
author	emilie <emilie@devijver.org>
	Fri, 21 Apr 2017 13:52:14 +0000 (15:52 +0200)
committer	emilie <emilie@devijver.org>
	Fri, 21 Apr 2017 13:52:14 +0000 (15:52 +0200)
pkg/DESCRIPTION	[deleted file]	patch \| blob \| blame \| history
pkg/LICENSE	[deleted file]	patch \| blob \| blame \| history
pkg/R/A_NAMESPACE.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/EMGLLF.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/EMGrank.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/computeGridLambda.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/constructionModelesLassoMLE.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/constructionModelesLassoRank.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/generateXY.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/initSmallEM.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/main.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/plot_valse.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/selectVariables.R	[deleted file]	patch \| blob \| blame \| history
pkg/R/util.R	[deleted file]	patch \| blob \| blame \| history
pkg/data/data.RData	[deleted file]	patch \| blob \| blame \| history
pkg/inst/testdata/TODO.csv	[deleted file]	patch \| blob \| blame \| history
pkg/man/valse-package.Rd	[deleted file]	patch \| blob \| blame \| history
pkg/src/Makevars	[deleted file]	patch \| blob \| blame \| history
pkg/src/adapters/a.EMGLLF.c	[deleted file]	patch \| blob \| blame \| history
pkg/src/adapters/a.EMGrank.c	[deleted file]	patch \| blob \| blame \| history
pkg/src/sources/EMGLLF.c	[deleted file]	patch \| blob \| blame \| history
pkg/src/sources/EMGLLF.h	[deleted file]	patch \| blob \| blame \| history
pkg/src/sources/EMGrank.c	[deleted file]	patch \| blob \| blame \| history
pkg/src/sources/EMGrank.h	[deleted file]	patch \| blob \| blame \| history
pkg/src/sources/utils.h	[deleted file]	patch \| blob \| blame \| history
pkg/tests/testthat.R	[deleted file]	patch \| blob \| blame \| history
pkg/tests/testthat/helper-context1.R	[deleted file]	patch \| blob \| blame \| history
pkg/tests/testthat/test-context1.R	[deleted file]	patch \| blob \| blame \| history
pkg/vignettes/.gitignore	[deleted file]	patch \| blob \| blame \| history