merge with remote

author Benjamin Auder <benjamin.auder@somewhere>

Thu, 6 Feb 2020 19:18:59 +0000 (20:18 +0100)

committer Benjamin Auder <benjamin.auder@somewhere>

Thu, 6 Feb 2020 19:18:59 +0000 (20:18 +0100)
author Benjamin Auder <benjamin.auder@somewhere>
Thu, 6 Feb 2020 19:18:59 +0000 (20:18 +0100)
committer Benjamin Auder <benjamin.auder@somewhere>
Thu, 6 Feb 2020 19:18:59 +0000 (20:18 +0100)
diff --git a/.gitattributes b/.gitattributes

new file mode 100644 (file)

index 0000000..30f5e54
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+*.pdf filter=fat
+*.zip filter=fat
+*.tar.xz filter=fat
+*.png filter=fat
diff --git a/.gitfat b/.gitfat

new file mode 100644 (file)

index 0000000..fc5cc54
--- /dev/null
+++ b/.gitfat
@@ -0,0 +1,2 @@
+[rsync]
+remote = gitfat@auder.net:~/files/valse
diff --git a/.gitignore b/.gitignore

index b0455d8..a84520a 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,3 @@
-#files generated by initialize.sh
-/.gitfat
-/.gitattributes
-
  #ignore temporary files
  *~
  *.swp
@@ -13,6 +9,7 @@
  *.Rproj*
  .Rprofile
  .Rbuildignore
+Rprof.out
  
  #ignore R CMD build/check genrated files
  /*.Rcheck/
@@ -22,8 +19,3 @@
  *.o
  *.so
  *.exe
-
-#misc
-Rprof.out
-*.zip
-.Rproj.user
diff --git a/README.md b/README.md

index 30d87ce..ef9aa92 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,6 +1,11 @@
  # VAriable seLection with mixtureS of modEls
  
-This code is the applied part of the PhD thesis of [Benjamin Gohehry](http://www.math.u-psud.fr/~goehry/).
+With [Emilie Devijver](http://ama.liglab.fr/~devijver/) and [Benjamin Gohehry](https://fr.linkedin.com/in/benjamin-goehry-275276124).
+
+Re-writing from [a similar project](https://github.com/yagu0/select) in MATLAB, which
+corresponds to applied parts of the PhD thesis of both co-authors.
+
+This git repo uses [git-fat](https://github.com/jedbrown/git-fat) to store binary objects. "git fat init && git fat pull" will get them.
  
  ## Description
  
diff --git a/TODO b/TODO

index 558e090..fed6eff 100644 (file)
--- a/TODO
+++ b/TODO
@@ -1,7 +1,15 @@
-Apply git-fat patch
+n = 100; m = 70; p = 5
+X = matrix(runif(n*p, -10, 10), nrow=n)
+Y = matrix(runif(n*m, -5, 15), nrow=n)
  
-Continue debug: (message "cannot allocate vector 40gb" ?!
-  X = matrix(runif(n*p, -10, 10), nrow=n)
-  Y = matrix(runif(n*m, -5, 15), nrow=n)
-  V1 = valse::valse(X, Y, fast=FALSE) #OK
-  V2 = valse::valse(X, Y, fast=FALSE) #not OK... almost
+V1 = valse::valse(X, Y, fast=FALSE)
+Error in while (!pi2AllPositive) { : 
+  missing value where TRUE/FALSE needed
+
+V2 = valse::valse(X, Y, fast=TRUE)
+list()
+Error in out[[ind_uniq[l]]] : 
+  attempt to select less than one element in get1index
+
+==> Error identified: line 61 in initSmallEM.R, division by 0
+It occurs also for smallers values of n and m, e.g.: n = 20; m = 20; p = 3
diff --git a/hooks/pre-push b/hooks/pre-push

index 7f26c8f..ba815bd 100755 (executable)
--- a/hooks/pre-push
+++ b/hooks/pre-push
@@ -1,5 +1,3 @@
  #!/bin/sh
  
-./.git-fat/git-fat pull
-./.git-fat/git-fat push
-git submodule update --merge
+git-fat push
diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION

index c28f54b..65ed448 100644 (file)
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: valse
  Title: Variable Selection With Mixture Of Models
-Date: 2016-12-01
+Date: 2020-01-11
  Version: 0.1-0
  Description: Two methods are implemented to cluster data with finite mixture
      regression models. Those procedures deal with high-dimensional covariates and
@@ -11,12 +11,12 @@ Description: Two methods are implemented to cluster data with finite mixture
      (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model-
      based clustering for high-dimensional data. Application to functional data' by
      Emilie Devijver, published in Advances in Data Analysis and Clustering (2016).
-Author: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre],
+Author: Benjamin Auder <benjamin.auder@universite-paris-saclay.fr> [aut,cre],
      Emilie Devijver <Emilie.Devijver@kuleuven.be> [aut],
      Benjamin Goehry <Benjamin.Goehry@math.u-psud.fr> [aut]
-Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr>
+Maintainer: Benjamin Auder <benjamin.auder@universite-paris-saclay.fr>
  Depends:
-    R (>= 3.0.0)
+    R (>= 3.5.0)
  Imports:
      MASS,
      parallel
@@ -27,7 +27,7 @@ Suggests:
      testthat
  URL: http://git.auder.net/?p=valse.git
  License: MIT + file LICENSE
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
  Collate:
      'plot_valse.R'
      'main.R'
diff --git a/pkg/LICENSE b/pkg/LICENSE

index a212458..ccb78c4 100644 (file)
--- a/pkg/LICENSE
+++ b/pkg/LICENSE
@@ -1,23 +1,2 @@
-Copyright (c)
-  2014-2017, Benjamin Auder
-       2014-2017, Emilie Devijver
-       2016-2017, Benjamin Goehry
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+YEAR: 2014-2020
+COPYRIGHT HOLDER: Benjamin Auder, Emilie Devijver, Benjamin Goehry
diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R

index 57638f9..c30b023 100644 (file)
--- a/pkg/R/EMGLLF.R
+++ b/pkg/R/EMGLLF.R
@@ -1,4 +1,4 @@
-#' EMGLLF 
+#' EMGLLF
  #'
  #' Description de EMGLLF
  #'
@@ -23,13 +23,13 @@
  #'   affec : ...
  #'
  #' @export
-EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
    X, Y, eps, fast)
  {
    if (!fast)
    {
      # Function in R
-    return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+    return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
        X, Y, eps))
    }
  
@@ -38,14 +38,14 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
    p <- ncol(X)  #nombre de covariables
    m <- ncol(Y)  #taille de Y (multivarié)
    k <- length(piInit)  #nombre de composantes dans le mélange
-  .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
-    X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k), 
-    llh = double(1), S = double(p * m * k), affec = integer(n), n, p, m, k, 
+  .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+    X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k),
+    llh = double(1), S = double(p * m * k), affec = integer(n), n, p, m, k,
      PACKAGE = "valse")
  }
  
  # R version - slow but easy to read
-.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
    X, Y, eps)
  {
    # Matrix dimensions
diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R

index f2bf58e..4054e25 100644 (file)
--- a/pkg/R/EMGrank.R
+++ b/pkg/R/EMGrank.R
@@ -29,7 +29,7 @@ EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE)
    p <- ncol(X)  #nombre de covariables
    m <- ncol(Y)  #taille de Y (multivarié)
    k <- length(Pi)  #nombre de composantes dans le mélange
-  .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, rank, phi = double(p * m * k), 
+  .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, as.integer(rank), phi = double(p * m * k), 
      LLF = double(1), n, p, m, k, PACKAGE = "valse")
  }
  
diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R

index ac0788a..f087ba7 100644 (file)
--- a/pkg/R/computeGridLambda.R
+++ b/pkg/R/computeGridLambda.R
@@ -1,4 +1,4 @@
-#' computeGridLambda 
+#' computeGridLambda
  #'
  #' Construct the data-driven grid for the regularization parameters used for the Lasso estimator
  #'
@@ -16,7 +16,7 @@
  #' @return the grid of regularization parameters
  #'
  #' @export
-computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, 
+computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini,
    maxi, eps, fast)
  {
    n <- nrow(X)
@@ -24,7 +24,7 @@ computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mi
    m <- ncol(Y)
    k <- length(piInit)
  
-  list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, 
+  list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0,
      X, Y, eps, fast)
  
    grid <- array(0, dim = c(p, m, k))
diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R

index d2a16bc..2d04adb 100644 (file)
--- a/pkg/R/constructionModelesLassoMLE.R
+++ b/pkg/R/constructionModelesLassoMLE.R
@@ -1,7 +1,7 @@
-#' constructionModelesLassoMLE 
+#' constructionModelesLassoMLE
  #'
  #' Construct a collection of models with the Lasso-MLE procedure.
-#' 
+#'
  #' @param phiInit an initialization for phi, get by initSmallEM.R
  #' @param rhoInit an initialization for rho, get by initSmallEM.R
  #' @param piInit an initialization for pi, get by initSmallEM.R
@@ -16,28 +16,28 @@
  #' @param ncores Number of cores, by default = 3
  #' @param fast TRUE to use compiled C code, FALSE for R code only
  #' @param verbose TRUE to show some execution traces
-#' 
+#'
  #' @return a list with several models, defined by phi, rho, pi, llh
  #'
  #' @export
-constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, 
+constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
    maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose)
  {
    if (ncores > 1)
    {
      cl <- parallel::makeCluster(ncores, outfile = "")
-    parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit", 
-      "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", 
+    parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit",
+      "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S",
        "ncores", "fast", "verbose"))
    }
  
    # Individual model computation
    computeAtLambda <- function(lambda)
    {
-    if (ncores > 1) 
+    if (ncores > 1)
        require("valse")  #nodes start with an empty environment
  
-    if (verbose) 
+    if (verbose)
        print(paste("Computations for lambda=", lambda))
  
      n <- nrow(X)
@@ -47,7 +47,7 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
      sel.lambda <- S[[lambda]]$selected
      # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
      col.sel <- which(sapply(sel.lambda, length) > 0)  #if list of selected vars
-    if (length(col.sel) == 0) 
+    if (length(col.sel) == 0)
        return(NULL)
  
      # lambda == 0 because we compute the EMV: no penalization here
@@ -88,7 +88,7 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
      #     log(piLambda[r]) + log(detRho[r]) - 0.5 *
      #       sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2)
      #   })
-    #   
+    #
      #   #logGam <- logGam - max(logGam) #adjust without changing proportions -> change the LLH
      #   gam <- exp(logGam)
      #   norm_fact <- sum(gam)
@@ -106,7 +106,7 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
        lapply(1:length(S), computeAtLambda)
      }
  
-  if (ncores > 1) 
+  if (ncores > 1)
      parallel::stopCluster(cl)
  
    out
diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R

index dc88f67..9df8168 100644 (file)
--- a/pkg/R/constructionModelesLassoRank.R
+++ b/pkg/R/constructionModelesLassoRank.R
@@ -18,7 +18,7 @@
  #' @return a list with several models, defined by phi, rho, pi, llh
  #'
  #' @export
-constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, 
+constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max,
    ncores, fast, verbose)
  {
    n <- nrow(X)
@@ -38,7 +38,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
      # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ça (rank.max-rank.min)^2
      # fois ...  Dans la dernière, on répète chaque chiffre une fois, et on fait ça
      # (rank.min-rank.max)^(k-1) fois.
-    RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1), 
+    RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1),
        each = deltaRank^(k - r)), each = L)
    }
    RankLambda[, k + 1] <- rep(1:L, times = Size)
@@ -46,8 +46,8 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
    if (ncores > 1)
    {
      cl <- parallel::makeCluster(ncores, outfile = "")
-    parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size", 
-      "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", 
+    parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size",
+      "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores",
        "verbose"))
    }
  
@@ -55,7 +55,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
    {
      lambdaIndex <- RankLambda[index, k + 1]
      rankIndex <- RankLambda[index, 1:k]
-    if (ncores > 1) 
+    if (ncores > 1)
        require("valse")  #workers start with an empty environment
  
      # 'relevant' will be the set of relevant columns
@@ -71,7 +71,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
        phi <- array(0, dim = c(p, m, k))
        if (length(relevant) > 0)
        {
-        res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, 
+        res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi,
            X[, relevant], Y, eps, rankIndex, fast)
          llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m)))
          phi[relevant, , ] <- res$phi
@@ -88,7 +88,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
        lapply(seq_len(length(S) * Size), computeAtLambda)
      }
  
-  if (ncores > 1) 
+  if (ncores > 1)
      parallel::stopCluster(cl)
  
    out
diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R

index 064b54b..f13598a 100644 (file)
--- a/pkg/R/generateXY.R
+++ b/pkg/R/generateXY.R
@@ -1,4 +1,4 @@
-#' generateXY 
+#' generateXY
  #'
  #' Generate a sample of (X,Y) of size n
  #'
@@ -30,7 +30,7 @@ generateXY <- function(n, π, meanX, β, covX, covY)
      class <- c(class, rep(i, sizePop[i]))
      newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
      X <- rbind(X, newBlockX)
-    Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% 
+    Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*%
        β[, , i], covY[, , i]))))
    }
  
diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R

index 179822f..7e9cce5 100644 (file)
--- a/pkg/R/initSmallEM.R
+++ b/pkg/R/initSmallEM.R
@@ -1,4 +1,4 @@
-#' initialization of the EM algorithm 
+#' initialization of the EM algorithm
  #'
  #' @param k number of components
  #' @param X matrix of covariates (of size n*p)
@@ -36,10 +36,10 @@ initSmallEM <- function(k, X, Y, fast)
        Z <- Zinit1[, repet]
        Z_indice <- seq_len(n)[Z == r]  #renvoit les indices où Z==r
        if (length(Z_indice) == 1) {
-        betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% 
+        betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*%
            crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
        } else {
-        betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% 
+        betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*%
            crossprod(X[Z_indice, ], Y[Z_indice, ])
        }
        sigmaInit1[, , r, repet] <- diag(m)
@@ -54,10 +54,11 @@ initSmallEM <- function(k, X, Y, fast)
        {
          dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
            - X[i, ] %*% phiInit1[, , r, repet])
-        Gam[i, r] <- piInit1[repet, r] * 
+        Gam[i, r] <- piInit1[repet, r] *
            det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
        }
        sumGamI <- sum(Gam[i, ])
+      # TODO: next line is a division by zero if dotProduct is big
        gamInit1[i, , repet] <- Gam[i, ]/sumGamI
      }
  
diff --git a/pkg/R/main.R b/pkg/R/main.R

index 387d553..8649342 100644 (file)
--- a/pkg/R/main.R
+++ b/pkg/R/main.R
@@ -1,4 +1,4 @@
-#' valse 
+#' valse
  #'
  #' Main function
  #'
@@ -27,8 +27,8 @@
  #' @examples
  #' #TODO: a few examples
  #' @export
-valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, 
-  maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, 
+valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10,
+  maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1,
    ncores_inner = 1, thresh = 1e-08, grid_lambda = numeric(0), size_coll_mod = 10,
    fast = TRUE, verbose = FALSE, plot = TRUE)
  {
@@ -36,24 +36,24 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
    p <- ncol(X)
    m <- ncol(Y)
  
-  if (verbose) 
+  if (verbose)
      print("main loop: over all k and all lambda")
  
    if (ncores_outer > 1) {
      cl <- parallel::makeCluster(ncores_outer, outfile = "")
-    parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", 
-      "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", 
-      "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", 
+    parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X",
+      "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin",
+      "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh",
        "size_coll_mod", "verbose", "p", "m"))
    }
  
    # Compute models with k components
    computeModels <- function(k)
    {
-    if (ncores_outer > 1) 
+    if (ncores_outer > 1)
        require("valse") #nodes start with an empty environment
  
-    if (verbose) 
+    if (verbose)
        print(paste("Parameters initialization for k =", k))
      # smallEM initializes parameters by k-means and regression model in each
      # component, doing this 20 times, and keeping the values maximizing the
@@ -61,32 +61,32 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
      P <- initSmallEM(k, X, Y, fast)
      if (length(grid_lambda) == 0)
      {
-      grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, 
+      grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
                                         X, Y, gamma, mini, maxi, eps, fast)
      }
-    if (length(grid_lambda) > size_coll_mod) 
+    if (length(grid_lambda) > size_coll_mod)
        grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
  
-    if (verbose) 
+    if (verbose)
        print("Compute relevant parameters")
      # select variables according to each regularization parameter from the grid:
      # S$selected corresponding to selected variables
-    S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, 
+    S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi,
        gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
  
      if (procedure == "LassoMLE") {
-      if (verbose) 
+      if (verbose)
          print("run the procedure Lasso-MLE")
        # compute parameter estimations, with the Maximum Likelihood Estimator,
        # restricted on selected variables.
-      models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, 
+      models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit,
          P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
      } else {
-      if (verbose) 
+      if (verbose)
          print("run the procedure Lasso-Rank")
        # compute parameter estimations, with the Low Rank Estimator, restricted on
        # selected variables.
-      models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min, 
+      models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min,
          rank.max, ncores_inner, fast, verbose)
      }
      # warning! Some models are NULL after running selectVariables
@@ -101,7 +101,7 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
      } else {
        lapply(kmin:kmax, computeModels)
      }
-  if (ncores_outer > 1) 
+  if (ncores_outer > 1)
      parallel::stopCluster(cl)
  
    if (!requireNamespace("capushe", quietly = TRUE))
@@ -117,9 +117,9 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
      # For a collection of models (same k, several lambda):
      LLH <- sapply(models, function(model) model$llh[1])
      k <- length(models[[1]]$pi)
-    sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[, 
+    sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[,
        , 1] != 0) + 1) - 1)
-    data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, 
+    data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n,
        complexity = sumPen, contrast = -LLH)
    }))
    tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
@@ -127,16 +127,16 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
    if (verbose == TRUE)
      print(tableauRecap)
    modSel <- capushe::capushe(tableauRecap, n)
-  indModSel <- if (selecMod == "DDSE") 
+  indModSel <- if (selecMod == "DDSE")
    {
      as.numeric(modSel@DDSE@model)
-  } else if (selecMod == "Djump") 
+  } else if (selecMod == "Djump")
    {
      as.numeric(modSel@Djump@model)
-  } else if (selecMod == "BIC") 
+  } else if (selecMod == "BIC")
    {
      modSel@BIC_capushe$model
-  } else if (selecMod == "AIC") 
+  } else if (selecMod == "AIC")
    {
      modSel@AIC_capushe$model
    }
diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R

index ec2302d..83316dc 100644 (file)
--- a/pkg/R/plot_valse.R
+++ b/pkg/R/plot_valse.R
@@ -1,4 +1,4 @@
-#' Plot 
+#' Plot
  #'
  #' It is a function which plots relevant parameters
  #'
@@ -25,8 +25,8 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
    for (r in 1:K)
    {
      Melt <- melt(t((model$phi[, , r])))
-    gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + 
-      geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
+    gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) +
+      geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
        midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
    }
    print(gReg)
@@ -39,9 +39,9 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
      Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
      gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
        + geom_tile()
-      + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
+      + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
          space = "Lab")
-      + ggtitle(paste("Difference between regression matrices in cluster", 
+      + ggtitle(paste("Difference between regression matrices in cluster",
          k1, "and", k2))
      print(gDiff)
    }
@@ -52,7 +52,7 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
      matCov[, r] <- diag(model$rho[, , r])
    MeltCov <- melt(matCov)
    gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
-    + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
+    + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
        space = "Lab")
      + ggtitle("Covariance matrices")
    print(gCov)
diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R

index bab45cc..eb6c590 100644 (file)
--- a/pkg/R/selectVariables.R
+++ b/pkg/R/selectVariables.R
@@ -1,4 +1,4 @@
-#' selectVariables 
+#' selectVariables
  #'
  #' It is a function which construct, for a given lambda, the sets of relevant variables.
  #'
@@ -22,19 +22,19 @@
  #'
  #' @export
  #'
-selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, 
+selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma,
    glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast)
  {
    if (ncores > 1) {
      cl <- parallel::makeCluster(ncores, outfile = "")
-    parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", 
+    parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit",
        "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
    }
  
    # Computation for a fixed lambda
    computeCoefs <- function(lambda)
    {
-    params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
+    params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
        X, Y, eps, fast)
  
      p <- ncol(X)
@@ -65,9 +65,9 @@ selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma
      } else {
        lapply(glambda, computeCoefs)
      }
-  if (ncores > 1) 
+  if (ncores > 1)
      parallel::stopCluster(cl)
- 
+
    print(out)
    # Suppress models which are computed twice En fait, ca ca fait la comparaison de
    # tous les parametres On veut juste supprimer ceux qui ont les memes variables
diff --git a/pkg/R/util.R b/pkg/R/util.R

index f8b01cc..ed10430 100644 (file)
--- a/pkg/R/util.R
+++ b/pkg/R/util.R
@@ -1,7 +1,5 @@
-# ...
+# Compute the determinant of a matrix, which can be 1x1 (scalar)
  gdet <- function(M)
  {
-       if (is.matrix(M))
-               return (det(M))
-       return (M[1]) #numeric, double
+       ifelse(is.matrix(M), det(M), M[1])
  }
diff --git a/pkg/src/adapters/a.EMGrank.c b/pkg/src/adapters/a.EMGrank.c

index b1dad9b..8da760d 100644 (file)
--- a/pkg/src/adapters/a.EMGrank.c
+++ b/pkg/src/adapters/a.EMGrank.c
@@ -10,7 +10,7 @@ SEXP EMGrank(
         SEXP maxi_,
         SEXP X_,
         SEXP Y_,
-       SEXP tau_,
+       SEXP eps_,
         SEXP rank_
  ) {
         // Get matrices dimensions
@@ -28,7 +28,7 @@ SEXP EMGrank(
         // get scalar parameters
         int mini = INTEGER_VALUE(mini_);
         int maxi = INTEGER_VALUE(maxi_);
-       double tau = NUMERIC_VALUE(tau_);
+       double eps = NUMERIC_VALUE(eps_);
  
         // Get pointers from SEXP arrays ; WARNING: by columns !
         double* Pi = REAL(Pi_);
@@ -53,7 +53,7 @@ SEXP EMGrank(
         // Call to EMGrank //
         /////////////////////
  
-       EMGrank_core(Pi, Rho, mini, maxi, X, Y, tau, rank,
+       EMGrank_core(Pi, Rho, mini, maxi, X, Y, eps, rank,
                 pPhi,pLLF,
                 n,p,m,k);
  
diff --git a/pkg/src/sources/EMGLLF.c b/pkg/src/sources/EMGLLF.c

index e8b3b84..b77f24a 100644 (file)
--- a/pkg/src/sources/EMGLLF.c
+++ b/pkg/src/sources/EMGLLF.c
@@ -417,4 +417,4 @@ void EMGLLF_core(
         free(X2);
         free(Y2);
         free(sqNorm2);
-}\f
+}
diff --git a/select.zip b/select.zip

new file mode 100644 (file)

index 0000000..f0465ce
--- /dev/null
+++ b/select.zip
@@ -0,0 +1 @@
+#$# git-fat 6ea97df8b04ef5722cc85b9f6c467cdee85155ff                43421
diff --git a/test/Makefile b/test/Makefile

index 7b16352..8b8697e 100644 (file)
--- a/test/Makefile
+++ b/test/Makefile
@@ -26,6 +26,6 @@ clean:
         rm -f *.o ../pkg/src/sources/*.o
  
  cclean: clean
-       rm -f *.so $(TESTS)
+       rm -f *.so ../pkg/src/*.so $(TESTS)
  
  .PHONY: all clean cclean
diff --git a/test/generateRunSaveTest_EMGLLF.R b/test/generateRunSaveTest_EMGLLF.R

index 674a726..c46b77b 100644 (file)
--- a/test/generateRunSaveTest_EMGLLF.R
+++ b/test/generateRunSaveTest_EMGLLF.R
@@ -1,10 +1,9 @@
  source("helper.R")
  library(valse)
  
-generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10,
-       gamma=1., lambda=0.5, eps=1e-6)
+generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1., lambda=0.5, eps=1e-6)
  {
-       testFolder = "data/"
+       testFolder = "./data/"
         dir.create(testFolder, showWarnings=FALSE, mode="0755")
  
         params = basicInitParameters(n, p, m, k)
diff --git a/test/generateRunSaveTest_EMGrank.R b/test/generateRunSaveTest_EMGrank.R

index 9969620..f6a3314 100644 (file)
--- a/test/generateRunSaveTest_EMGrank.R
+++ b/test/generateRunSaveTest_EMGrank.R
@@ -1,22 +1,21 @@
  source("helper.R")
  library(valse)
  
-generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1.0,
-       rank = c(1,2,4))
+generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1.0, rank = c(1,2,4))
  {
    eps = 1e-6
-  pi = rep(1.0/k, k)
-  rho = array(dim=c(m,m,k))
+  Pi = rep(1.0/k, k)
+  Rho = array(dim=c(m,m,k))
    for(i in 1:k)
-    rho[,,i] = diag(1,m)
+    Rho[,,i] = diag(1,m)
    xy = generateXYdefault(n, p, m, k)
  
-  testFolder = "../data/"
+  testFolder = "./data/"
    dir.create(testFolder, showWarnings=FALSE, mode="0755")
    #save inputs
-  write.table(as.double(rho), paste(testFolder,"rho",sep=""),
+       write.table(as.double(Pi), paste(testFolder,"Pi",sep=""),
                 row.names=F, col.names=F)
-       write.table(as.double(pi), paste(testFolder,"pi",sep=""),
+  write.table(as.double(Rho), paste(testFolder,"Rho",sep=""),
                 row.names=F, col.names=F)
    write.table(as.integer(mini), paste(testFolder,"mini",sep=""),
                 row.names=F, col.names=F)
@@ -33,7 +32,7 @@ generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10,
    write.table(as.integer(c(n,p,m,k)), paste(testFolder,"dimensions",sep=""),
                 row.names=F, col.names=F)
  
-  res = valse::EMGrank(pi,rho,mini,maxi,xy$X,xy$Y,eps,rank,fast=FALSE)
+  res = valse::EMGrank(Pi,Rho,mini,maxi,xy$X,xy$Y,eps,rank,fast=FALSE)
  
    #save output
    write.table(as.double(res$phi),paste(testFolder,"phi",sep=""),row.names=F,col.names=F)
diff --git a/test/test.EMGrank.c b/test/test.EMGrank.c

index 8374b2f..71ce506 100644 (file)
--- a/test/test.EMGrank.c
+++ b/test/test.EMGrank.c
@@ -13,13 +13,13 @@ int main(int argc, char** argv)
  
         ////////////
         // INPUTS //
-       Real* rho = readArray_real("rho");
-       Real* pi = readArray_real("pi");
+       Real* Rho = readArray_real("Rho");
+       Real* Pi = readArray_real("Pi");
         int mini = read_int("mini");
         int maxi = read_int("maxi");
         Real* X = readArray_real("X");
         Real* Y = readArray_real("Y");
-       Real tau = read_real("tau");
+       Real eps = read_real("eps");
         int* rank = readArray_int("rank");
         ////////////
  
@@ -31,13 +31,13 @@ int main(int argc, char** argv)
  
         //////////////////////////
         // Main call to EMGrank //
-       EMGrank_core(pi,rho,mini,maxi,X,Y,tau,rank,
+       EMGrank_core(Pi,Rho,mini,maxi,X,Y,eps,rank,
                 phi,LLF,
                 n,p,m,k);
         //////////////////////////
  
-       free(rho);
-       free(pi);
+       free(Rho);
+       free(Pi);
         free(X);
         free(Y);
         free(rank);
diff --git a/test/test_EMGrank.R b/test/test_EMGrank.R

index 9aec8bb..b140e5e 100644 (file)
--- a/test/test_EMGrank.R
+++ b/test/test_EMGrank.R
@@ -4,13 +4,24 @@ testFolder = "data/"
  # NOTE: R typing is really terrible. as.double as.matrix ...and so on; don't remove.
  
  #get inputs
-#TODO
+npmk = as.matrix(read.table(paste(testFolder,"dimensions",sep="")))
+n = npmk[1]; p=npmk[2]; m=npmk[3]; k=npmk[4]
+Pi = as.double(as.matrix(read.table(paste(testFolder,"Pi",sep="")))[,])
+Rho = array(as.double(as.matrix(read.table(paste(testFolder,"Rho",sep="")))), dim=c(m,m,k))
+mini = as.integer(as.matrix(read.table(paste(testFolder,"mini",sep="")))[1])
+maxi = as.integer(as.matrix(read.table(paste(testFolder,"maxi",sep="")))[1])
+X = matrix(as.double(as.matrix(read.table(paste(testFolder,"X",sep="")))), n,p)
+Y = matrix(as.double(as.matrix(read.table(paste(testFolder,"Y",sep="")))), n,m)
+eps = as.double(as.matrix(read.table(paste(testFolder,"eps",sep="")))[1])
+rank = as.double(as.matrix(read.table(paste(testFolder,"rank",sep="")))[,])
  
  #get outputs
-#TODO
+phi = array(as.double(as.matrix(read.table(paste(testFolder,"phi",sep="")))), dim=c(p,m,k))
+LLF = as.double(as.matrix(read.table(paste(testFolder,"LLF",sep="")))[1])
  
-res = valse::EMGrank(...)
+res = valse::EMGrank(Pi,Rho,mini,maxi,X,Y,eps,rank,fast=TRUE)
  
  #compare outputs
  nd=7 #number of digits
-#TODO
+print( all(round(phi,nd) == round(res$phi,nd)) )
+print( all(round(LLF,nd) == round(res$LLF,nd)) )
author	Benjamin Auder <benjamin.auder@somewhere>
	Thu, 6 Feb 2020 19:18:59 +0000 (20:18 +0100)
committer	Benjamin Auder <benjamin.auder@somewhere>
	Thu, 6 Feb 2020 19:18:59 +0000 (20:18 +0100)
.gitattributes	[new file with mode: 0644]	patch \| blob
.gitfat	[new file with mode: 0644]	patch \| blob
.gitignore		patch \| blob \| blame \| history
README.md		patch \| blob \| blame \| history
TODO		patch \| blob \| blame \| history
hooks/pre-push		patch \| blob \| blame \| history
pkg/DESCRIPTION		patch \| blob \| blame \| history
pkg/LICENSE		patch \| blob \| blame \| history
pkg/R/EMGLLF.R		patch \| blob \| blame \| history
pkg/R/EMGrank.R		patch \| blob \| blame \| history
pkg/R/computeGridLambda.R		patch \| blob \| blame \| history
pkg/R/constructionModelesLassoMLE.R		patch \| blob \| blame \| history
pkg/R/constructionModelesLassoRank.R		patch \| blob \| blame \| history
pkg/R/generateXY.R		patch \| blob \| blame \| history
pkg/R/initSmallEM.R		patch \| blob \| blame \| history
pkg/R/main.R		patch \| blob \| blame \| history
pkg/R/plot_valse.R		patch \| blob \| blame \| history
pkg/R/selectVariables.R		patch \| blob \| blame \| history
pkg/R/util.R		patch \| blob \| blame \| history
pkg/src/adapters/a.EMGrank.c		patch \| blob \| blame \| history
pkg/src/sources/EMGLLF.c		patch \| blob \| blame \| history
select.zip	[new file with mode: 0644]	patch \| blob
test/Makefile		patch \| blob \| blame \| history
test/generateRunSaveTest_EMGLLF.R		patch \| blob \| blame \| history
test/generateRunSaveTest_EMGrank.R		patch \| blob \| blame \| history
test/test.EMGrank.c		patch \| blob \| blame \| history
test/test_EMGrank.R		patch \| blob \| blame \| history