Package as sent to CRAN

author Benjamin Auder <benjamin.auder@somewhere>

Sun, 16 May 2021 19:36:32 +0000 (21:36 +0200)

committer Benjamin Auder <benjamin.auder@somewhere>

Sun, 16 May 2021 19:36:32 +0000 (21:36 +0200)
author Benjamin Auder <benjamin.auder@somewhere>
Sun, 16 May 2021 19:36:32 +0000 (21:36 +0200)
committer Benjamin Auder <benjamin.auder@somewhere>
Sun, 16 May 2021 19:36:32 +0000 (21:36 +0200)
diff --git a/.gitignore b/.gitignore

index d8cc23d..643e73c 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,4 @@ Rprof.out
  *.so
  *.exe
  .Rproj.user
+symbols.rds
diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION

index edb3356..13fee37 100644 (file)
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: valse
  Title: Variable Selection with Mixture of Models
-Date: 2020-03-11
+Date: 2021-05-16
  Version: 0.1-0
  Description: Two methods are implemented to cluster data with finite mixture
      regression models. Those procedures deal with high-dimensional covariates and
@@ -20,15 +20,15 @@ Depends:
  Imports:
      MASS,
      parallel,
-    ggplot2,
      cowplot,
+    ggplot2,
      reshape2
  Suggests:
      capushe,
      roxygen2
-URL: http://git.auder.net/?p=valse.git
+URL: https://git.auder.net/?p=valse.git
  License: MIT + file LICENSE
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
  Collate:
      'plot_valse.R'
      'main.R'
diff --git a/pkg/LICENSE b/pkg/LICENSE

index ccb78c4..b3f4c16 100644 (file)
--- a/pkg/LICENSE
+++ b/pkg/LICENSE
@@ -1,2 +1,2 @@
-YEAR: 2014-2020
+YEAR: 2014-2021
  COPYRIGHT HOLDER: Benjamin Auder, Emilie Devijver, Benjamin Goehry
diff --git a/pkg/NAMESPACE b/pkg/NAMESPACE

index a77f192..8d6ca72 100644 (file)
--- a/pkg/NAMESPACE
+++ b/pkg/NAMESPACE
@@ -1 +1,32 @@
-exportPattern(".")
+# Generated by roxygen2: do not edit by hand
+
+export(EMGLLF)
+export(EMGrank)
+export(computeGridLambda)
+export(constructionModelesLassoMLE)
+export(constructionModelesLassoRank)
+export(generateXY)
+export(initSmallEM)
+export(plot_valse)
+export(runValse)
+export(selectVariables)
+importFrom(MASS,ginv)
+importFrom(cowplot,background_grid)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,geom_boxplot)
+importFrom(ggplot2,geom_line)
+importFrom(ggplot2,geom_tile)
+importFrom(ggplot2,ggplot)
+importFrom(ggplot2,ggtitle)
+importFrom(ggplot2,scale_fill_gradient2)
+importFrom(ggplot2,theme)
+importFrom(parallel,clusterExport)
+importFrom(parallel,makeCluster)
+importFrom(parallel,parLapply)
+importFrom(parallel,stopCluster)
+importFrom(reshape2,melt)
+importFrom(stats,cutree)
+importFrom(stats,dist)
+importFrom(stats,hclust)
+importFrom(stats,runif)
+useDynLib(valse)
diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R

index 0584382..fd0cd4d 100644 (file)
--- a/pkg/R/constructionModelesLassoMLE.R
+++ b/pkg/R/constructionModelesLassoMLE.R
@@ -102,7 +102,7 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
    # For each lambda, computation of the parameters
    out <-
      if (ncores > 1) {
-      parLapply(cl, 1:length(S), computeAtLambda)
+      parallel::parLapply(cl, 1:length(S), computeAtLambda)
      } else {
        lapply(1:length(S), computeAtLambda)
      }
diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R

index 6e18409..7a02967 100644 (file)
--- a/pkg/R/constructionModelesLassoRank.R
+++ b/pkg/R/constructionModelesLassoRank.R
@@ -83,7 +83,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
    # For each lambda in the grid we compute the estimators
    out <-
      if (ncores > 1) {
-      parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
+      parallel::parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
      } else {
        lapply(seq_len(length(S) * Size), computeAtLambda)
      }
diff --git a/pkg/R/main.R b/pkg/R/main.R

index 129aa25..2afcdf7 100644 (file)
--- a/pkg/R/main.R
+++ b/pkg/R/main.R
@@ -35,10 +35,10 @@
  #' data = generateXY(n, c(0.4,0.6), rep(0,p), beta, diag(0.5, p), diag(0.5, m))
  #' X = data$X
  #' Y = data$Y
-#' res = runValse(X, Y, kmax = 5)
+#' res = runValse(X, Y, kmax = 5, plot=FALSE)
  #' X <- matrix(runif(100), nrow=50)
  #' Y <- matrix(runif(100), nrow=50)
-#' res = runValse(X, Y)
+#' res = runValse(X, Y, plot=FALSE)
  #'
  #' @export
  runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10,
@@ -50,8 +50,7 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1,
    p <- ncol(X)
    m <- ncol(Y)
  
-  if (verbose)
-    print("main loop: over all k and all lambda")
+  if (verbose) print("main loop: over all k and all lambda")
  
    if (ncores_outer > 1) {
      cl <- parallel::makeCluster(ncores_outer, outfile = "")
@@ -62,8 +61,7 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1,
    }
  
    # Compute models with k components
-  computeModels <- function(k)
-  {
+  computeModels <- function(k) {
      if (ncores_outer > 1)
        require("valse") #nodes start with an empty environment
  
@@ -73,8 +71,7 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1,
      # component, doing this 20 times, and keeping the values maximizing the
      # likelihood after 10 iterations of the EM algorithm.
      P <- initSmallEM(k, X, Y, fast)
-    if (length(grid_lambda) == 0)
-    {
+    if (length(grid_lambda) == 0) {
        grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
                                         X, Y, gamma, mini, maxi, eps, fast)
      }
@@ -111,56 +108,45 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1,
    # List (index k) of lists (index lambda) of models
    models_list <-
      if (ncores_outer > 1) {
-      parLapply(cl, kmin:kmax, computeModels)
+      parallel::parLapply(cl, kmin:kmax, computeModels)
      } else {
        lapply(kmin:kmax, computeModels)
      }
-  if (ncores_outer > 1)
-    parallel::stopCluster(cl)
+  if (ncores_outer > 1) parallel::stopCluster(cl)
  
-  if (!requireNamespace("capushe", quietly = TRUE))
-  {
+  if (!requireNamespace("capushe", quietly = TRUE)) {
      warning("'capushe' not available: returning all models")
      return(models_list)
    }
  
    # Get summary 'tableauRecap' from models
-  tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
-  {
+  tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i) {
      models <- models_list[[i]]
      # For a collection of models (same k, several lambda):
      LLH <- sapply(models, function(model) model$llh[1])
      k <- length(models[[1]]$pi)
-    sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[,
-      , 1] != 0) + 1) - 1)
-    data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n,
-      complexity = sumPen, contrast = -LLH)
+    sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[,,1] != 0) + 1) - 1)
+    data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, complexity = sumPen, contrast = -LLH)
    }))
    tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
-  if (verbose)
-    print(tableauRecap)
+  if (verbose) print(tableauRecap)
  
    if (nrow(tableauRecap) > 10) {
      modSel <- capushe::capushe(tableauRecap, n)
-    indModSel <- if (selecMod == "DDSE")
-    {
+    indModSel <- if (selecMod == "DDSE") {
        as.numeric(modSel@DDSE@model)
-    } else if (selecMod == "Djump")
-    {
+    } else if (selecMod == "Djump") {
        as.numeric(modSel@Djump@model)
-    } else if (selecMod == "BIC")
-    {
+    } else if (selecMod == "BIC") {
        modSel@BIC_capushe$model
-    } else if (selecMod == "AIC")
-    {
+    } else if (selecMod == "AIC") {
        modSel@AIC_capushe$model
      }
      listMod <- as.integer(unlist(strsplit(as.character(indModSel), "[.]")))
      modelSel <- models_list[[listMod[1]]][[listMod[2]]]
      modelSel$models <- tableauRecap
  
-    if (plot)
-      print(plot_valse(X, Y, modelSel))
+    if (plot) plot_valse(X, Y, modelSel)
      return(modelSel)
    }
    tableauRecap
diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R

index b47c7da..e3fd38e 100644 (file)
--- a/pkg/R/plot_valse.R
+++ b/pkg/R/plot_valse.R
@@ -1,3 +1,4 @@
+utils::globalVariables(c("Var1","Var2","X1","X2","value")) #, package="valse")
  #' Plot
  #'
  #' It is a function which plots relevant parameters
@@ -9,7 +10,7 @@
  #' @param k1 index of the first cluster to be compared
  #' @param k2 index of the second cluster to be compared
  #'
-#' @importFrom ggplot2 ggplot aes ggtitle geom_tile geom_line geom_point scale_fill_gradient2 geom_boxplot theme
+#' @importFrom ggplot2 ggplot aes ggtitle geom_tile geom_line scale_fill_gradient2 geom_boxplot theme
  #' @importFrom cowplot background_grid
  #' @importFrom reshape2 melt
  #'
@@ -20,24 +21,22 @@ plot_valse <- function(X, Y, model, comp = FALSE, k1 = NA, k2 = NA)
    K <- length(model$pi)
    ## regression matrices
    gReg <- list()
-  for (r in 1:K)
-  {
-    Melt <- melt(t((model$phi[, , r])))
-    gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))  +
-      geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
-      midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
+  for (r in 1:K) {
+    Melt <- reshape2::melt(t((model$phi[, , r])))
+    gReg[[r]] <- ggplot2::ggplot(data = Melt, ggplot2::aes(x = Var1, y = Var2, fill = value))  +
+      ggplot2::geom_tile() + ggplot2::scale_fill_gradient2(low = "blue", high = "red", mid = "white",
+      midpoint = 0, space = "Lab") + ggplot2::ggtitle(paste("Regression matrices in cluster", r))
    }
    print(gReg)
  
    ## Differences between two clusters
-  if (comp)
-  {
+  if (comp) {
      if (is.na(k1) || is.na(k2))
        print("k1 and k2 must be integers, representing the clusters you want to compare")
-    Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
-    gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + 
-      geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
-        space = "Lab") + ggtitle(paste("Difference between regression matrices in cluster",
+    Melt <- reshape2::melt(t(model$phi[, , k1] - model$phi[, , k2]))
+    gDiff <- ggplot2::ggplot(data = Melt, ggplot2::aes(x = Var1, y = Var2, fill = value)) + 
+      ggplot2::geom_tile() + ggplot2::scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+        space = "Lab") + ggplot2::ggtitle(paste("Difference between regression matrices in cluster",
          k1, "and", k2))
      print(gDiff)
    }
@@ -46,19 +45,19 @@ plot_valse <- function(X, Y, model, comp = FALSE, k1 = NA, k2 = NA)
    matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
    for (r in 1:K)
      matCov[, r] <- diag(model$rho[, , r])
-  MeltCov <- melt(matCov)
-  gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() +
-    scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
-      space = "Lab") + ggtitle("Covariance matrices (diag., one row per cluster)")
+  MeltCov <- reshape2::melt(matCov)
+  gCov <- ggplot2::ggplot(data = MeltCov, ggplot2::aes(x = Var1, y = Var2, fill = value)) + ggplot2::geom_tile() +
+    ggplot2::scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+      space = "Lab") + ggplot2::ggtitle("Covariance matrices (diag., one row per cluster)")
    print(gCov)
  
    ### Proportions
-  gam2 <- matrix(NA, ncol = K, nrow = n)
+  gam2 <- matrix(NA, ncol = 2, nrow = n)
    for (i in 1:n)
      gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
  
-  bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + geom_boxplot() +
-     theme(legend.position = "none") + background_grid(major = "xy", minor = "none")  + 
-    ggtitle("Assignment boxplot per cluster")
+  bp <- ggplot2::ggplot(data.frame(gam2), ggplot2::aes(x = X2, y = X1, color = X2, group = X2)) + ggplot2::geom_boxplot() +
+     ggplot2::theme(legend.position = "none") + cowplot::background_grid(major = "xy", minor = "none")  + 
+    ggplot2::ggtitle("Assignment boxplot per cluster")
    print(bp)
  }
diff --git a/pkg/src/Makevars b/pkg/src/Makevars

index 6a25e63..6932e7b 100644 (file)
--- a/pkg/src/Makevars
+++ b/pkg/src/Makevars
@@ -1 +1,2 @@
-PKG_LIBS=-lm -lgsl -lcblas
+#PKG_LIBS=-lm -lgsl -lcblas
+PKG_LIBS = `$(R_HOME)/bin/Rscript -e "RcppGSL:::LdFlags()"` -lm $(BLAS_LIBS) $(FLIBS)
diff --git a/test/pkgExample.R b/test/pkgExample.R

new file mode 100644 (file)

index 0000000..215c462
--- /dev/null
+++ b/test/pkgExample.R
@@ -0,0 +1,8 @@
+library(valse)
+n = 50; m = 10; p = 5
+beta = array(0, dim=c(p,m,2))
+beta[,,1] = 1
+beta[,,2] = 2
+data = generateXY(n, c(0.4,0.6), rep(0,p), beta, diag(0.5, p), diag(0.5, m))
+X = data$X ; Y = data$Y
+res = runValse(X, Y, kmax = 5)
author	Benjamin Auder <benjamin.auder@somewhere>
	Sun, 16 May 2021 19:36:32 +0000 (21:36 +0200)
committer	Benjamin Auder <benjamin.auder@somewhere>
	Sun, 16 May 2021 19:36:32 +0000 (21:36 +0200)
.gitignore		patch \| blob \| blame \| history
pkg/DESCRIPTION		patch \| blob \| blame \| history
pkg/LICENSE		patch \| blob \| blame \| history
pkg/NAMESPACE		patch \| blob \| blame \| history
pkg/R/constructionModelesLassoMLE.R		patch \| blob \| blame \| history
pkg/R/constructionModelesLassoRank.R		patch \| blob \| blame \| history
pkg/R/main.R		patch \| blob \| blame \| history
pkg/R/plot_valse.R		patch \| blob \| blame \| history
pkg/src/Makevars		patch \| blob \| blame \| history
test/pkgExample.R	[new file with mode: 0644]	patch \| blob