Pass R CMD check --as-cran

author Benjamin Auder <benjamin.auder@somewhere>

Wed, 11 Mar 2020 12:20:33 +0000 (13:20 +0100)

committer Benjamin Auder <benjamin.auder@somewhere>

Wed, 11 Mar 2020 12:20:33 +0000 (13:20 +0100)
author Benjamin Auder <benjamin.auder@somewhere>
Wed, 11 Mar 2020 12:20:33 +0000 (13:20 +0100)
committer Benjamin Auder <benjamin.auder@somewhere>
Wed, 11 Mar 2020 12:20:33 +0000 (13:20 +0100)
diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION

index 8dd0fcb..77812ed 100644 (file)
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: valse
-Title: Variable Selection With Mixture Of Models
-Date: 2020-01-11
+Title: Variable Selection with Mixture of Models
+Date: 2020-03-11
  Version: 0.1-0
  Description: Two methods are implemented to cluster data with finite mixture
      regression models. Those procedures deal with high-dimensional covariates and
@@ -19,10 +19,12 @@ Depends:
      R (>= 3.5.0)
  Imports:
      MASS,
-    parallel
+    parallel,
+    ggplot2,
+    cowplot,
+    reshape2
  Suggests:
      capushe,
-    methods,
      roxygen2
  URL: http://git.auder.net/?p=valse.git
  License: MIT + file LICENSE
diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R

index 93012fb..dada0ef 100644 (file)
--- a/pkg/R/EMGLLF.R
+++ b/pkg/R/EMGLLF.R
@@ -16,6 +16,7 @@
  #' @param X matrix of covariates (of size n*p)
  #' @param Y matrix of responses (of size n*m)
  #' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param fast boolean to enable or not the C function call
  #'
  #' @return A list (corresponding to the model collection) defined by (phi,rho,pi,LLF,S,affec):
  #'   phi : regression mean for each cluster
@@ -37,14 +38,8 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
    }
  
    # Function in C
-  n <- nrow(X)  #nombre d'echantillons
-  p <- ncol(X)  #nombre de covariables
-  m <- ncol(Y)  #taille de Y (multivarie)
-  k <- length(piInit)  #nombre de composantes dans le melange
    .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
-    X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k),
-    llh = double(1), S = double(p * m * k), affec = integer(n), n, p, m, k,
-    PACKAGE = "valse")
+    X, Y, eps, PACKAGE = "valse")
  }
  
  # R version - slow but easy to read
diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R

index 2dc6c37..fa66b3d 100644 (file)
--- a/pkg/R/EMGrank.R
+++ b/pkg/R/EMGrank.R
@@ -13,13 +13,14 @@
  #' @param Y matrix of responses (of size n*m)
  #' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
  #' @param rank vector of possible ranks
+#' @param fast boolean to enable or not the C function call
  #'
  #' @return A list (corresponding to the model collection) defined by (phi,LLF):
  #'   phi : regression mean for each cluster
  #'   LLF : log likelihood with respect to the training set
  #'
  #' @export
-EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE)
+EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast)
  {
    if (!fast)
    {
@@ -28,12 +29,7 @@ EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE)
    }
  
    # Function in C
-  n <- nrow(X)  #nombre d'echantillons
-  p <- ncol(X)  #nombre de covariables
-  m <- ncol(Y)  #taille de Y (multivarie)
-  k <- length(Pi)  #nombre de composantes dans le melange
-  .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, as.integer(rank), phi = double(p * m * k), 
-    LLF = double(1), n, p, m, k, PACKAGE = "valse")
+  .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, as.integer(rank), PACKAGE = "valse")
  }
  
  # helper to always have matrices as arg (TODO: put this elsewhere? improve?)  -->
diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R

index f087ba7..3dae84c 100644 (file)
--- a/pkg/R/computeGridLambda.R
+++ b/pkg/R/computeGridLambda.R
@@ -12,6 +12,7 @@
  #' @param mini minimum number of iterations in EM algorithm
  #' @param maxi maximum number of iterations in EM algorithm
  #' @param eps threshold to stop EM algorithm
+#' @param fast boolean to enable or not the C function call
  #'
  #' @return the grid of regularization parameters
  #'
diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R

index f13598a..d2e00ef 100644 (file)
--- a/pkg/R/generateXY.R
+++ b/pkg/R/generateXY.R
@@ -3,16 +3,16 @@
  #' Generate a sample of (X,Y) of size n
  #'
  #' @param n sample size
-#' @param π proportion for each cluster
+#' @param p proportion for each cluster
  #' @param meanX matrix of group means for covariates (of size p)
  #' @param covX covariance for covariates (of size p*p)
-#' @param β regression matrix, of size p*m*k
+#' @param beta regression matrix, of size p*m*k
  #' @param covY covariance for the response vector (of size m*m*K)
  #'
  #' @return list with X and Y
  #'
  #' @export
-generateXY <- function(n, π, meanX, β, covX, covY)
+generateXY <- function(n, p, meanX, beta, covX, covY)
  {
    p <- dim(covX)[1]
    m <- dim(covY)[1]
@@ -22,7 +22,7 @@ generateXY <- function(n, π, meanX, β, covX, covY)
    Y <- matrix(nrow = 0, ncol = m)
  
    # random generation of the size of each population in X~Y (unordered)
-  sizePop <- rmultinom(1, n, π)
+  sizePop <- stats::rmultinom(1, n, p)
    class <- c() #map i in 1:n --> index of class in 1:k
  
    for (i in 1:k)
@@ -31,7 +31,7 @@ generateXY <- function(n, π, meanX, β, covX, covY)
      newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
      X <- rbind(X, newBlockX)
      Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*%
-      β[, , i], covY[, , i]))))
+      beta[, , i], covY[, , i]))))
    }
  
    shuffle <- sample(n)
diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R

index fccd51d..487a4e1 100644 (file)
--- a/pkg/R/initSmallEM.R
+++ b/pkg/R/initSmallEM.R
@@ -3,10 +3,10 @@
  #' @param k number of components
  #' @param X matrix of covariates (of size n*p)
  #' @param Y matrix of responses (of size n*m)
+#' @param fast boolean to enable or not the C function call
  #'
  #' @return a list with phiInit, rhoInit, piInit, gamInit
  #'
-#' @importFrom methods new
  #' @importFrom stats cutree dist hclust runif
  #' @export
  initSmallEM <- function(k, X, Y, fast)
diff --git a/pkg/R/main.R b/pkg/R/main.R

index 85a41b7..d750fec 100644 (file)
--- a/pkg/R/main.R
+++ b/pkg/R/main.R
@@ -21,6 +21,7 @@
  #' @param size_coll_mod (Maximum) size of a collection of models
  #' @param fast TRUE to use compiled C code, FALSE for R code only
  #' @param verbose TRUE to show some execution traces
+#' @param plot TRUE to plot the selected models after run
  #'
  #' @return a list with estimators of parameters
  #'
diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R

index 3160067..73188d2 100644 (file)
--- a/pkg/R/plot_valse.R
+++ b/pkg/R/plot_valse.R
@@ -6,23 +6,24 @@
  #' @param Y matrix of responses (of size n*m)
  #' @param model the model constructed by valse procedure
  #' @param n sample size
-#' @return several plots
+#' @param comp TRUE to enable pairwise clusters comparison
+#' @param k1 index of the first cluster to be compared
+#' @param k2 index of the second cluster to be compared
+#'
+#' @importFrom ggplot2 ggplot aes ggtitle geom_tile geom_line geom_point scale_fill_gradient2 geom_boxplot theme
+#' @importFrom cowplot background_grid
+#' @importFrom reshape2 melt
  #'
  #' @export
  plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
  {
-  require("gridExtra")
-  require("ggplot2")
-  require("reshape2")
-  require("cowplot")
-
    K <- length(model$pi)
    ## regression matrices
    gReg <- list()
    for (r in 1:K)
    {
      Melt <- melt(t((model$phi[, , r])))
-    gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) +
+    gReg[[r]] <- ggplot(data = Melt, aes(x = "Var1", y = "Var2", fill = "value")) +
        geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
        midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
    }
@@ -31,10 +32,10 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
    ## Differences between two clusters
    if (comp)
    {
-    if (is.na(k1) || is.na(k))
+    if (is.na(k1) || is.na(k2))
        print("k1 and k2 must be integers, representing the clusters you want to compare")
      Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
-    gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
+    gDiff <- ggplot(data = Melt, aes(x = "Var1", y = "Var2", fill = "value"))
        + geom_tile()
        + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
          space = "Lab")
@@ -48,7 +49,7 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
    for (r in 1:K)
      matCov[, r] <- diag(model$rho[, , r])
    MeltCov <- melt(matCov)
-  gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
+  gCov <- ggplot(data = MeltCov, aes(x = "Var1", y = "Var2", fill = "value")) + geom_tile()
      + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
        space = "Lab")
      + ggtitle("Covariance matrices")
@@ -59,7 +60,7 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
    for (i in 1:n)
      gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
  
-  bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2))
+  bp <- ggplot(data.frame(gam2), aes(x = "X2", y = "X1", color = "X2", group = "X2"))
      + geom_boxplot()
      + theme(legend.position = "none")
      + background_grid(major = "xy", minor = "none")
@@ -80,7 +81,7 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
    }
    data <- data.frame(mean = as.vector(meanPerClass),
      cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
-  g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
-  print(g + geom_line(aes(linetype = cluster, color = cluster))
-    + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster"))
+  g <- ggplot(data, aes(x = "time", y = "mean", group = "cluster", color = "cluster"))
+  print(g + geom_line(aes(linetype = "cluster", color = "cluster"))
+    + geom_point(aes(color = "cluster")) + ggtitle("Mean per cluster"))
  }
diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R

index 0c18c67..e08a941 100644 (file)
--- a/pkg/R/selectVariables.R
+++ b/pkg/R/selectVariables.R
@@ -15,6 +15,7 @@
  #' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
  #' @param eps   threshold to say that EM algorithm has converged
  #' @param ncores Number or cores for parallel execution (1 to disable)
+#' @param fast boolean to enable or not the C function call
  #'
  #' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
  #'
diff --git a/pkg/data/data.RData b/pkg/data/data.RData

deleted file mode 100644 (file)

index a9f09e1..0000000

Binary files a/pkg/data/data.RData and /dev/null differ
diff --git a/pkg/data/data2.RData b/pkg/data/data2.RData

deleted file mode 100644 (file)

index 80003e3..0000000

Binary files a/pkg/data/data2.RData and /dev/null differ
diff --git a/pkg/src/sources/EMGLLF.c b/pkg/src/EMGLLF.c

similarity index 91%

rename from pkg/src/sources/EMGLLF.c

rename to pkg/src/EMGLLF.c

index b77f24a..978f253 100644 (file)
--- a/pkg/src/sources/EMGLLF.c
+++ b/pkg/src/EMGLLF.c
@@ -6,30 +6,30 @@
  // TODO: don't recompute indexes ai(...) and mi(...) when possible
  void EMGLLF_core(
         // IN parameters
-       const Real* phiInit, // parametre initial de moyenne renormalisé
-       const Real* rhoInit, // parametre initial de variance renormalisé
+       const Real* phiInit, // parametre initial de moyenne renormalise
+       const Real* rhoInit, // parametre initial de variance renormalise
         const Real* piInit,      // parametre initial des proportions
-       const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon
-       int mini, // nombre minimal d'itérations dans l'algorithme EM
-       int maxi, // nombre maximal d'itérations dans l'algorithme EM
-       Real gamma, // puissance des proportions dans la pénalisation pour un Lasso adaptatif
-       Real lambda, // valeur du paramètre de régularisation du Lasso
-       const Real* X, // régresseurs
-       const Real* Y, // réponse
+       const Real* gamInit, // parametre initial des probabilites a posteriori de chaque echantillon
+       int mini, // nombre minimal d'iterations dans l'algorithme EM
+       int maxi, // nombre maximal d'iterations dans l'algorithme EM
+       Real gamma, // puissance des proportions dans la penalisation pour un Lasso adaptatif
+       Real lambda, // valeur du parametre de regularisation du Lasso
+       const Real* X, // regresseurs
+       const Real* Y, // reponse
         Real eps, // seuil pour accepter la convergence
         // OUT parameters (all pointers, to be modified)
-       Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
-       Real* rho, // parametre de variance renormalisé, calculé par l'EM
-       Real* pi, // parametre des proportions renormalisé, calculé par l'EM
-       Real* llh, // (derniere) log vraisemblance associée à cet échantillon,
-                  // pour les valeurs estimées des paramètres
+       Real* phi, // parametre de moyenne renormalise, calcule par l'EM
+       Real* rho, // parametre de variance renormalise, calcule par l'EM
+       Real* pi, // parametre des proportions renormalise, calcule par l'EM
+       Real* llh, // (derniere) log vraisemblance associee a cet echantillon,
+                  // pour les valeurs estimees des parametres
         Real* S,
         int* affec,
         // additional size parameters
         int n, // nombre d'echantillons
         int p, // nombre de covariables
-       int m, // taille de Y (multivarié)
-       int k) // nombre de composantes dans le mélange
+       int m, // taille de Y (multivarie)
+       int k) // nombre de composantes dans le melange
  {
         //Initialize outputs
         copyArray(phiInit, phi, p*m*k);
@@ -67,7 +67,7 @@ void EMGLLF_core(
                 copyArray(rho, Rho, m*m*k);
                 copyArray(pi, Pi, k);
  
-               // Calculs associés a Y et X
+               // Calculs associes a Y et X
                 for (int r=0; r<k; r++)
                 {
                         for (int mm=0; mm<m; mm++)
@@ -174,7 +174,7 @@ void EMGLLF_core(
                 for (int v=0; v<k; v++)
                         gam2DotLogPi2 += gam2[v] * log(pi2[v]);
  
-               //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
+               //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit decroissante ou constante
                 while (-invN*a + lambda*piPowGammaDotB < -invN*gam2DotLogPi2 + lambda*pi2PowGammaDotB
                         && kk<1000)
                 {
diff --git a/pkg/src/sources/EMGLLF.h b/pkg/src/EMGLLF.h

similarity index 100%

rename from pkg/src/sources/EMGLLF.h

rename to pkg/src/EMGLLF.h
diff --git a/pkg/src/sources/EMGrank.c b/pkg/src/EMGrank.c

similarity index 93%

rename from pkg/src/sources/EMGrank.c

rename to pkg/src/EMGrank.c

index 3a9bf94..a98ff91 100644 (file)
--- a/pkg/src/sources/EMGrank.c
+++ b/pkg/src/EMGrank.c
@@ -41,20 +41,20 @@ static Real* pinv(const Real* matrix, int dim)
  void EMGrank_core(
         // IN parameters
         const Real* Pi, // parametre de proportion
-       const Real* Rho, // parametre initial de variance renormalisé
-       int mini, // nombre minimal d'itérations dans l'algorithme EM
-       int maxi, // nombre maximal d'itérations dans l'algorithme EM
-       const Real* X, // régresseurs
-       const Real* Y, // réponse
+       const Real* Rho, // parametre initial de variance renormalise
+       int mini, // nombre minimal d'iterations dans l'algorithme EM
+       int maxi, // nombre maximal d'iterations dans l'algorithme EM
+       const Real* X, // regresseurs
+       const Real* Y, // reponse
         Real tau, // seuil pour accepter la convergence
         const int* rank, // vecteur des rangs possibles
         // OUT parameters
-       Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
-       Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
+       Real* phi, // parametre de moyenne renormalise, calcule par l'EM
+       Real* LLF, // log vraisemblance associe a cet echantillon, pour les valeurs estimees des parametres
         // additional size parameters
         int n, // taille de l'echantillon
         int p, // nombre de covariables
-       int m, // taille de Y (multivarié)
+       int m, // taille de Y (multivarie)
         int k) // nombre de composantes
  {
         // Allocations, initializations
@@ -91,7 +91,7 @@ void EMGrank_core(
                 // Etape M //
                 /////////////
                 
-               //M step: Mise à jour de Beta (et donc phi)
+               //M step: Mise a jour de Beta (et donc phi)
                 for (int r=0; r<k; r++)
                 {
                         //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:)
diff --git a/pkg/src/sources/EMGrank.h b/pkg/src/EMGrank.h

similarity index 100%

rename from pkg/src/sources/EMGrank.h

rename to pkg/src/EMGrank.h
diff --git a/pkg/src/Makevars b/pkg/src/Makevars

index 50b7fb6..6a25e63 100644 (file)
--- a/pkg/src/Makevars
+++ b/pkg/src/Makevars
@@ -1,11 +1 @@
-#Debug flags
-PKG_CFLAGS=-g -I./sources
-
-#Prod flags:
-#PKG_CFLAGS=-O2 -I./sources
-
  PKG_LIBS=-lm -lgsl -lcblas
-
-SOURCES = $(wildcard adapters/*.c sources/*.c)
-
-OBJECTS = $(SOURCES:.c=.o)
diff --git a/pkg/src/adapters/a.EMGLLF.c b/pkg/src/a.EMGLLF.c

similarity index 100%

rename from pkg/src/adapters/a.EMGLLF.c

rename to pkg/src/a.EMGLLF.c
diff --git a/pkg/src/adapters/a.EMGrank.c b/pkg/src/a.EMGrank.c

similarity index 100%

rename from pkg/src/adapters/a.EMGrank.c

rename to pkg/src/a.EMGrank.c
diff --git a/pkg/src/sources/utils.h b/pkg/src/utils.h

similarity index 100%

rename from pkg/src/sources/utils.h

rename to pkg/src/utils.h
diff --git a/pkg/src/valse_init.c b/pkg/src/valse_init.c

new file mode 100644 (file)

index 0000000..b8ce9dd
--- /dev/null
+++ b/pkg/src/valse_init.c
@@ -0,0 +1,19 @@
+#include <stdlib.h> // for NULL
+#include <R_ext/Rdynload.h>
+#include <Rdefines.h>
+
+/* .Call calls */
+extern SEXP EMGLLF(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+extern SEXP EMGrank(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
+
+static const R_CallMethodDef CEntries[] = {
+  { "EMGLLF",  (DL_FUNC) &EMGLLF,  11 },
+  { "EMGrank", (DL_FUNC) &EMGrank, 8  },
+  { NULL,      NULL,               0  }
+};
+
+void R_init_valse(DllInfo *dll)
+{
+  R_registerRoutines(dll, NULL, CEntries, NULL, NULL);
+  R_useDynamicSymbols(dll, FALSE);
+}
author	Benjamin Auder <benjamin.auder@somewhere>
	Wed, 11 Mar 2020 12:20:33 +0000 (13:20 +0100)
committer	Benjamin Auder <benjamin.auder@somewhere>
	Wed, 11 Mar 2020 12:20:33 +0000 (13:20 +0100)
pkg/DESCRIPTION		patch \| blob \| blame \| history
pkg/R/EMGLLF.R		patch \| blob \| blame \| history
pkg/R/EMGrank.R		patch \| blob \| blame \| history
pkg/R/computeGridLambda.R		patch \| blob \| blame \| history
pkg/R/generateXY.R		patch \| blob \| blame \| history
pkg/R/initSmallEM.R		patch \| blob \| blame \| history
pkg/R/main.R		patch \| blob \| blame \| history
pkg/R/plot_valse.R		patch \| blob \| blame \| history
pkg/R/selectVariables.R		patch \| blob \| blame \| history
pkg/data/data.RData	[deleted file]	patch \| blob \| blame \| history
pkg/data/data2.RData	[deleted file]	patch \| blob \| blame \| history
pkg/src/EMGLLF.c	[moved from pkg/src/sources/EMGLLF.c with 91% similarity]	patch \| blob \| blame \| history
pkg/src/EMGLLF.h	[moved from pkg/src/sources/EMGLLF.h with 100% similarity]	patch \| blob \| blame \| history
pkg/src/EMGrank.c	[moved from pkg/src/sources/EMGrank.c with 93% similarity]	patch \| blob \| blame \| history
pkg/src/EMGrank.h	[moved from pkg/src/sources/EMGrank.h with 100% similarity]	patch \| blob \| blame \| history
pkg/src/Makevars		patch \| blob \| blame \| history
pkg/src/a.EMGLLF.c	[moved from pkg/src/adapters/a.EMGLLF.c with 100% similarity]	patch \| blob \| blame \| history
pkg/src/a.EMGrank.c	[moved from pkg/src/adapters/a.EMGrank.c with 100% similarity]	patch \| blob \| blame \| history
pkg/src/utils.h	[moved from pkg/src/sources/utils.h with 100% similarity]	patch \| blob \| blame \| history
pkg/src/valse_init.c	[new file with mode: 0644]	patch \| blob