From: Benjamin Auder Date: Sun, 16 May 2021 19:36:32 +0000 (+0200) Subject: Package as sent to CRAN X-Git-Url: https://git.auder.net/app_dev.php/config.php?a=commitdiff_plain;h=64cceb2ece0d8142fee3e82e1cc56e20261caf45;p=valse.git Package as sent to CRAN --- diff --git a/.gitignore b/.gitignore index d8cc23d..643e73c 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ Rprof.out *.so *.exe .Rproj.user +symbols.rds diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index edb3356..13fee37 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: valse Title: Variable Selection with Mixture of Models -Date: 2020-03-11 +Date: 2021-05-16 Version: 0.1-0 Description: Two methods are implemented to cluster data with finite mixture regression models. Those procedures deal with high-dimensional covariates and @@ -20,15 +20,15 @@ Depends: Imports: MASS, parallel, - ggplot2, cowplot, + ggplot2, reshape2 Suggests: capushe, roxygen2 -URL: http://git.auder.net/?p=valse.git +URL: https://git.auder.net/?p=valse.git License: MIT + file LICENSE -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 Collate: 'plot_valse.R' 'main.R' diff --git a/pkg/LICENSE b/pkg/LICENSE index ccb78c4..b3f4c16 100644 --- a/pkg/LICENSE +++ b/pkg/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2014-2020 +YEAR: 2014-2021 COPYRIGHT HOLDER: Benjamin Auder, Emilie Devijver, Benjamin Goehry diff --git a/pkg/NAMESPACE b/pkg/NAMESPACE index a77f192..8d6ca72 100644 --- a/pkg/NAMESPACE +++ b/pkg/NAMESPACE @@ -1 +1,32 @@ -exportPattern(".") +# Generated by roxygen2: do not edit by hand + +export(EMGLLF) +export(EMGrank) +export(computeGridLambda) +export(constructionModelesLassoMLE) +export(constructionModelesLassoRank) +export(generateXY) +export(initSmallEM) +export(plot_valse) +export(runValse) +export(selectVariables) +importFrom(MASS,ginv) +importFrom(cowplot,background_grid) +importFrom(ggplot2,aes) +importFrom(ggplot2,geom_boxplot) +importFrom(ggplot2,geom_line) +importFrom(ggplot2,geom_tile) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,ggtitle) +importFrom(ggplot2,scale_fill_gradient2) +importFrom(ggplot2,theme) +importFrom(parallel,clusterExport) +importFrom(parallel,makeCluster) +importFrom(parallel,parLapply) +importFrom(parallel,stopCluster) +importFrom(reshape2,melt) +importFrom(stats,cutree) +importFrom(stats,dist) +importFrom(stats,hclust) +importFrom(stats,runif) +useDynLib(valse) diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R index 0584382..fd0cd4d 100644 --- a/pkg/R/constructionModelesLassoMLE.R +++ b/pkg/R/constructionModelesLassoMLE.R @@ -102,7 +102,7 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, # For each lambda, computation of the parameters out <- if (ncores > 1) { - parLapply(cl, 1:length(S), computeAtLambda) + parallel::parLapply(cl, 1:length(S), computeAtLambda) } else { lapply(1:length(S), computeAtLambda) } diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R index 6e18409..7a02967 100644 --- a/pkg/R/constructionModelesLassoRank.R +++ b/pkg/R/constructionModelesLassoRank.R @@ -83,7 +83,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, # For each lambda in the grid we compute the estimators out <- if (ncores > 1) { - parLapply(cl, seq_len(length(S) * Size), computeAtLambda) + parallel::parLapply(cl, seq_len(length(S) * Size), computeAtLambda) } else { lapply(seq_len(length(S) * Size), computeAtLambda) } diff --git a/pkg/R/main.R b/pkg/R/main.R index 129aa25..2afcdf7 100644 --- a/pkg/R/main.R +++ b/pkg/R/main.R @@ -35,10 +35,10 @@ #' data = generateXY(n, c(0.4,0.6), rep(0,p), beta, diag(0.5, p), diag(0.5, m)) #' X = data$X #' Y = data$Y -#' res = runValse(X, Y, kmax = 5) +#' res = runValse(X, Y, kmax = 5, plot=FALSE) #' X <- matrix(runif(100), nrow=50) #' Y <- matrix(runif(100), nrow=50) -#' res = runValse(X, Y) +#' res = runValse(X, Y, plot=FALSE) #' #' @export runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, @@ -50,8 +50,7 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, p <- ncol(X) m <- ncol(Y) - if (verbose) - print("main loop: over all k and all lambda") + if (verbose) print("main loop: over all k and all lambda") if (ncores_outer > 1) { cl <- parallel::makeCluster(ncores_outer, outfile = "") @@ -62,8 +61,7 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, } # Compute models with k components - computeModels <- function(k) - { + computeModels <- function(k) { if (ncores_outer > 1) require("valse") #nodes start with an empty environment @@ -73,8 +71,7 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, # component, doing this 20 times, and keeping the values maximizing the # likelihood after 10 iterations of the EM algorithm. P <- initSmallEM(k, X, Y, fast) - if (length(grid_lambda) == 0) - { + if (length(grid_lambda) == 0) { grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, X, Y, gamma, mini, maxi, eps, fast) } @@ -111,56 +108,45 @@ runValse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, # List (index k) of lists (index lambda) of models models_list <- if (ncores_outer > 1) { - parLapply(cl, kmin:kmax, computeModels) + parallel::parLapply(cl, kmin:kmax, computeModels) } else { lapply(kmin:kmax, computeModels) } - if (ncores_outer > 1) - parallel::stopCluster(cl) + if (ncores_outer > 1) parallel::stopCluster(cl) - if (!requireNamespace("capushe", quietly = TRUE)) - { + if (!requireNamespace("capushe", quietly = TRUE)) { warning("'capushe' not available: returning all models") return(models_list) } # Get summary 'tableauRecap' from models - tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i) - { + tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i) { models <- models_list[[i]] # For a collection of models (same k, several lambda): LLH <- sapply(models, function(model) model$llh[1]) k <- length(models[[1]]$pi) - sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[, - , 1] != 0) + 1) - 1) - data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, - complexity = sumPen, contrast = -LLH) + sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[,,1] != 0) + 1) - 1) + data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, complexity = sumPen, contrast = -LLH) })) tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ] - if (verbose) - print(tableauRecap) + if (verbose) print(tableauRecap) if (nrow(tableauRecap) > 10) { modSel <- capushe::capushe(tableauRecap, n) - indModSel <- if (selecMod == "DDSE") - { + indModSel <- if (selecMod == "DDSE") { as.numeric(modSel@DDSE@model) - } else if (selecMod == "Djump") - { + } else if (selecMod == "Djump") { as.numeric(modSel@Djump@model) - } else if (selecMod == "BIC") - { + } else if (selecMod == "BIC") { modSel@BIC_capushe$model - } else if (selecMod == "AIC") - { + } else if (selecMod == "AIC") { modSel@AIC_capushe$model } listMod <- as.integer(unlist(strsplit(as.character(indModSel), "[.]"))) modelSel <- models_list[[listMod[1]]][[listMod[2]]] modelSel$models <- tableauRecap - if (plot) - print(plot_valse(X, Y, modelSel)) + if (plot) plot_valse(X, Y, modelSel) return(modelSel) } tableauRecap diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R index b47c7da..e3fd38e 100644 --- a/pkg/R/plot_valse.R +++ b/pkg/R/plot_valse.R @@ -1,3 +1,4 @@ +utils::globalVariables(c("Var1","Var2","X1","X2","value")) #, package="valse") #' Plot #' #' It is a function which plots relevant parameters @@ -9,7 +10,7 @@ #' @param k1 index of the first cluster to be compared #' @param k2 index of the second cluster to be compared #' -#' @importFrom ggplot2 ggplot aes ggtitle geom_tile geom_line geom_point scale_fill_gradient2 geom_boxplot theme +#' @importFrom ggplot2 ggplot aes ggtitle geom_tile geom_line scale_fill_gradient2 geom_boxplot theme #' @importFrom cowplot background_grid #' @importFrom reshape2 melt #' @@ -20,24 +21,22 @@ plot_valse <- function(X, Y, model, comp = FALSE, k1 = NA, k2 = NA) K <- length(model$pi) ## regression matrices gReg <- list() - for (r in 1:K) - { - Melt <- melt(t((model$phi[, , r]))) - gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + - geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", - midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r)) + for (r in 1:K) { + Melt <- reshape2::melt(t((model$phi[, , r]))) + gReg[[r]] <- ggplot2::ggplot(data = Melt, ggplot2::aes(x = Var1, y = Var2, fill = value)) + + ggplot2::geom_tile() + ggplot2::scale_fill_gradient2(low = "blue", high = "red", mid = "white", + midpoint = 0, space = "Lab") + ggplot2::ggtitle(paste("Regression matrices in cluster", r)) } print(gReg) ## Differences between two clusters - if (comp) - { + if (comp) { if (is.na(k1) || is.na(k2)) print("k1 and k2 must be integers, representing the clusters you want to compare") - Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2])) - gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + - geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, - space = "Lab") + ggtitle(paste("Difference between regression matrices in cluster", + Melt <- reshape2::melt(t(model$phi[, , k1] - model$phi[, , k2])) + gDiff <- ggplot2::ggplot(data = Melt, ggplot2::aes(x = Var1, y = Var2, fill = value)) + + ggplot2::geom_tile() + ggplot2::scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + ggplot2::ggtitle(paste("Difference between regression matrices in cluster", k1, "and", k2)) print(gDiff) } @@ -46,19 +45,19 @@ plot_valse <- function(X, Y, model, comp = FALSE, k1 = NA, k2 = NA) matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K) for (r in 1:K) matCov[, r] <- diag(model$rho[, , r]) - MeltCov <- melt(matCov) - gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + - scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, - space = "Lab") + ggtitle("Covariance matrices (diag., one row per cluster)") + MeltCov <- reshape2::melt(matCov) + gCov <- ggplot2::ggplot(data = MeltCov, ggplot2::aes(x = Var1, y = Var2, fill = value)) + ggplot2::geom_tile() + + ggplot2::scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + ggplot2::ggtitle("Covariance matrices (diag., one row per cluster)") print(gCov) ### Proportions - gam2 <- matrix(NA, ncol = K, nrow = n) + gam2 <- matrix(NA, ncol = 2, nrow = n) for (i in 1:n) gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i]) - bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + geom_boxplot() + - theme(legend.position = "none") + background_grid(major = "xy", minor = "none") + - ggtitle("Assignment boxplot per cluster") + bp <- ggplot2::ggplot(data.frame(gam2), ggplot2::aes(x = X2, y = X1, color = X2, group = X2)) + ggplot2::geom_boxplot() + + ggplot2::theme(legend.position = "none") + cowplot::background_grid(major = "xy", minor = "none") + + ggplot2::ggtitle("Assignment boxplot per cluster") print(bp) } diff --git a/pkg/src/Makevars b/pkg/src/Makevars index 6a25e63..6932e7b 100644 --- a/pkg/src/Makevars +++ b/pkg/src/Makevars @@ -1 +1,2 @@ -PKG_LIBS=-lm -lgsl -lcblas +#PKG_LIBS=-lm -lgsl -lcblas +PKG_LIBS = `$(R_HOME)/bin/Rscript -e "RcppGSL:::LdFlags()"` -lm $(BLAS_LIBS) $(FLIBS) diff --git a/test/pkgExample.R b/test/pkgExample.R new file mode 100644 index 0000000..215c462 --- /dev/null +++ b/test/pkgExample.R @@ -0,0 +1,8 @@ +library(valse) +n = 50; m = 10; p = 5 +beta = array(0, dim=c(p,m,2)) +beta[,,1] = 1 +beta[,,2] = 2 +data = generateXY(n, c(0.4,0.6), rep(0,p), beta, diag(0.5, p), diag(0.5, m)) +X = data$X ; Y = data$Y +res = runValse(X, Y, kmax = 5)