From 228ee602a972fcac6177db0d539bf9d0c5fa477f Mon Sep 17 00:00:00 2001 From: emilie Date: Fri, 21 Apr 2017 15:58:12 +0200 Subject: [PATCH] essai fusion --- pkg/DESCRIPTION | 42 +++ pkg/LICENSE | 23 ++ pkg/R/A_NAMESPACE.R | 16 ++ pkg/R/EMGLLF.R | 194 +++++++++++++ pkg/R/EMGrank.R | 120 ++++++++ pkg/R/computeGridLambda.R | 36 +++ pkg/R/constructionModelesLassoMLE.R | 111 ++++++++ pkg/R/constructionModelesLassoRank.R | 95 ++++++ pkg/R/generateXY.R | 39 +++ pkg/R/initSmallEM.R | 79 +++++ pkg/R/main.R | 161 +++++++++++ pkg/R/plot_valse.R | 89 ++++++ pkg/R/selectVariables.R | 81 ++++++ pkg/R/util.R | 7 + pkg/data/data.RData | Bin 0 -> 1010 bytes pkg/data/data2.RData | Bin 0 -> 18385 bytes pkg/data/script_data.R | 15 + pkg/inst/testdata/TODO.csv | 1 + pkg/man/valse-package.Rd | 37 +++ pkg/src/Makevars | 11 + pkg/src/adapters/a.EMGLLF.c | 91 ++++++ pkg/src/adapters/a.EMGrank.c | 73 +++++ pkg/src/sources/EMGLLF.c | 412 +++++++++++++++++++++++++++ pkg/src/sources/EMGLLF.h | 32 +++ pkg/src/sources/EMGrank.c | 307 ++++++++++++++++++++ pkg/src/sources/EMGrank.h | 25 ++ pkg/src/sources/utils.h | 48 ++++ pkg/tests/testthat.R | 4 + pkg/tests/testthat/helper-context1.R | 5 + pkg/tests/testthat/test-context1.R | 11 + pkg/vignettes/.gitignore | 14 + 31 files changed, 2179 insertions(+) create mode 100644 pkg/DESCRIPTION create mode 100644 pkg/LICENSE create mode 100644 pkg/R/A_NAMESPACE.R create mode 100644 pkg/R/EMGLLF.R create mode 100644 pkg/R/EMGrank.R create mode 100644 pkg/R/computeGridLambda.R create mode 100644 pkg/R/constructionModelesLassoMLE.R create mode 100644 pkg/R/constructionModelesLassoRank.R create mode 100644 pkg/R/generateXY.R create mode 100644 pkg/R/initSmallEM.R create mode 100644 pkg/R/main.R create mode 100644 pkg/R/plot_valse.R create mode 100644 pkg/R/selectVariables.R create mode 100644 pkg/R/util.R create mode 100644 pkg/data/data.RData create mode 100644 pkg/data/data2.RData create mode 100644 pkg/data/script_data.R create mode 100644 pkg/inst/testdata/TODO.csv create mode 100644 pkg/man/valse-package.Rd create mode 100644 pkg/src/Makevars create mode 100644 pkg/src/adapters/a.EMGLLF.c create mode 100644 pkg/src/adapters/a.EMGrank.c create mode 100644 pkg/src/sources/EMGLLF.c create mode 100644 pkg/src/sources/EMGLLF.h create mode 100644 pkg/src/sources/EMGrank.c create mode 100644 pkg/src/sources/EMGrank.h create mode 100644 pkg/src/sources/utils.h create mode 100644 pkg/tests/testthat.R create mode 100644 pkg/tests/testthat/helper-context1.R create mode 100644 pkg/tests/testthat/test-context1.R create mode 100644 pkg/vignettes/.gitignore diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION new file mode 100644 index 0000000..3b33e25 --- /dev/null +++ b/pkg/DESCRIPTION @@ -0,0 +1,42 @@ +Package: valse +Title: Variable Selection With Mixture Of Models +Date: 2016-12-01 +Version: 0.1-0 +Description: Two methods are implemented to cluster data with finite mixture + regression models. Those procedures deal with high-dimensional covariates and + responses through a variable selection procedure based on the Lasso estimator. + A low-rank constraint could be added, computed for the Lasso-Rank procedure. + A collection of models is constructed, varying the level of sparsity and the + number of clusters, and a model is selected using a model selection criterion + (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model- + based clustering for high-dimensional data. Application to functional data' by + Emilie Devijver, published in Advances in Data Analysis and Clustering (2016). +Author: Benjamin Auder [aut,cre], + Emilie Devijver [aut], + Benjamin Goehry [aut] +Maintainer: Benjamin Auder +Depends: + R (>= 3.0.0) +Imports: + MASS, + parallel +Suggests: + capushe, + roxygen2, + testhat +URL: http://git.auder.net/?p=valse.git +License: MIT + file LICENSE +RoxygenNote: 5.0.1 +Collate: + 'plot_valse.R' + 'main.R' + 'selectVariables.R' + 'constructionModelesLassoRank.R' + 'constructionModelesLassoMLE.R' + 'computeGridLambda.R' + 'initSmallEM.R' + 'EMGrank.R' + 'EMGLLF.R' + 'generateXY.R' + 'A_NAMESPACE.R' + 'util.R' diff --git a/pkg/LICENSE b/pkg/LICENSE new file mode 100644 index 0000000..a212458 --- /dev/null +++ b/pkg/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) + 2014-2017, Benjamin Auder + 2014-2017, Emilie Devijver + 2016-2017, Benjamin Goehry + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R new file mode 100644 index 0000000..8e1783e --- /dev/null +++ b/pkg/R/A_NAMESPACE.R @@ -0,0 +1,16 @@ +#' @include generateXY.R +#' @include EMGLLF.R +#' @include EMGrank.R +#' @include initSmallEM.R +#' @include computeGridLambda.R +#' @include constructionModelesLassoMLE.R +#' @include constructionModelesLassoRank.R +#' @include selectVariables.R +#' @include main.R +#' @include plot_valse.R +#' +#' @useDynLib valse +#' +#' @importFrom parallel makeCluster parLapply stopCluster clusterExport +#' @importFrom MASS ginv +NULL diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R new file mode 100644 index 0000000..bf4476b --- /dev/null +++ b/pkg/R/EMGLLF.R @@ -0,0 +1,194 @@ +#' EMGLLF +#' +#' Description de EMGLLF +#' +#' @param phiInit an initialization for phi +#' @param rhoInit an initialization for rho +#' @param piInit an initialization for pi +#' @param gamInit initialization for the a posteriori probabilities +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param lambda regularization parameter in the Lasso estimation +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' +#' @return A list ... phi,rho,pi,LLF,S,affec: +#' phi : parametre de moyenne renormalisé, calculé par l'EM +#' rho : parametre de variance renormalisé, calculé par l'EM +#' pi : parametre des proportions renormalisé, calculé par l'EM +#' LLF : log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres +#' S : ... affec : ... +#' +#' @export +EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, fast) +{ + if (!fast) + { + # Function in R + return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps)) + } + + # Function in C + n <- nrow(X) #nombre d'echantillons + p <- ncol(X) #nombre de covariables + m <- ncol(Y) #taille de Y (multivarié) + k <- length(piInit) #nombre de composantes dans le mélange + .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k), + LLF = double(maxi), S = double(p * m * k), affec = integer(n), n, p, m, k, + PACKAGE = "valse") +} + +# R version - slow but easy to read +.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps) +{ + # Matrix dimensions + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + + # Adjustments required when p==1 or m==1 (var.sel. or output dim 1) + if (p==1 || m==1) + phiInit <- array(phiInit, dim=c(p,m,k)) + if (m==1) + rhoInit <- array(rhoInit, dim=c(m,m,k)) + + # Outputs + phi <- phiInit + rho <- rhoInit + pi <- piInit + llh <- -Inf + S <- array(0, dim = c(p, m, k)) + + # Algorithm variables + gam <- gamInit + Gram2 <- array(0, dim = c(p, p, k)) + ps2 <- array(0, dim = c(p, m, k)) + X2 <- array(0, dim = c(n, p, k)) + Y2 <- array(0, dim = c(n, m, k)) + EPS <- 1e-15 + + for (ite in 1:maxi) + { + # Remember last pi,rho,phi values for exit condition in the end of loop + Phi <- phi + Rho <- rho + Pi <- pi + + # Computations associated to X and Y + for (r in 1:k) + { + for (mm in 1:m) + Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm] + for (i in 1:n) + X2[i, , r] <- sqrt(gam[i, r]) * X[i, ] + for (mm in 1:m) + ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r]) + for (j in 1:p) + { + for (s in 1:p) + Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r]) + } + } + + ## M step + + # For pi + b <- sapply(1:k, function(r) sum(abs(phi[, , r]))) + gam2 <- colSums(gam) + a <- sum(gam %*% log(pi)) + + # While the proportions are nonpositive + kk <- 0 + pi2AllPositive <- FALSE + while (!pi2AllPositive) + { + pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi) + pi2AllPositive <- all(pi2 >= 0) + kk <- kk + 1 + } + + # t(m) is the largest value in the grid O.1^k such that it is nonincreasing + while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < + # na.rm=TRUE to handle 0*log(0) + -sum(gam2 * log(pi2), na.rm=TRUE)/n + lambda * sum(pi2^gamma * b)) + { + pi2 <- pi + 0.1^kk * (1/n * gam2 - pi) + kk <- kk + 1 + } + t <- 0.1^kk + pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi)) + + # For phi and rho + for (r in 1:k) + { + for (mm in 1:m) + { + ps <- 0 + for (i in 1:n) + ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r]) + nY2 <- sum(Y2[, mm, r]^2) + rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2) + } + } + + for (r in 1:k) + { + for (j in 1:p) + { + for (mm in 1:m) + { + S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + + sum(phi[-j, mm, r] * Gram2[j, -j, r]) + if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- 0 + } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r] + } else { + phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r] + } + } + } + } + + ## E step + + # Precompute det(rho[,,r]) for r in 1...k + detRho <- sapply(1:k, function(r) gdet(rho[, , r])) + sumLogLLH <- 0 + for (i in 1:n) + { + # Update gam[,]; use log to avoid numerical problems + logGam <- sapply(1:k, function(r) { + log(pi[r]) + log(detRho[r]) - 0.5 * + sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2) + }) + + logGam <- logGam - max(logGam) #adjust without changing proportions + gam[i, ] <- exp(logGam) + norm_fact <- sum(gam[i, ]) + gam[i, ] <- gam[i, ] / norm_fact + sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2)) + } + + sumPen <- sum(pi^gamma * b) + last_llh <- llh + llh <- -sumLogLLH/n #+ lambda * sumPen + dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh))) + Dist1 <- max((abs(phi - Phi))/(1 + abs(phi))) + Dist2 <- max((abs(rho - Rho))/(1 + abs(rho))) + Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi))) + dist2 <- max(Dist1, Dist2, Dist3) + + if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps))) + break + } + + list(phi = phi, rho = rho, pi = pi, llh = llh, S = S) +} diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R new file mode 100644 index 0000000..b85a0fa --- /dev/null +++ b/pkg/R/EMGrank.R @@ -0,0 +1,120 @@ +#' EMGrank +#' +#' Description de EMGrank +#' +#' @param Pi Parametre de proportion +#' @param Rho Parametre initial de variance renormalisé +#' @param mini Nombre minimal d'itérations dans l'algorithme EM +#' @param maxi Nombre maximal d'itérations dans l'algorithme EM +#' @param X Régresseurs +#' @param Y Réponse +#' @param tau Seuil pour accepter la convergence +#' @param rank Vecteur des rangs possibles +#' +#' @return A list ... +#' phi : parametre de moyenne renormalisé, calculé par l'EM +#' LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres +#' +#' @export +EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast = TRUE) +{ + if (!fast) + { + # Function in R + return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank)) + } + + # Function in C + n <- nrow(X) #nombre d'echantillons + p <- ncol(X) #nombre de covariables + m <- ncol(Y) #taille de Y (multivarié) + k <- length(Pi) #nombre de composantes dans le mélange + .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, tau, rank, phi = double(p * m * k), + LLF = double(1), n, p, m, k, PACKAGE = "valse") +} + +# helper to always have matrices as arg (TODO: put this elsewhere? improve?) --> +# Yes, we should use by-columns storage everywhere... [later!] +matricize <- function(X) +{ + if (!is.matrix(X)) + return(t(as.matrix(X))) + return(X) +} + +# R version - slow but easy to read +.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, tau, rank) +{ + # matrix dimensions + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(Pi) + + # init outputs + phi <- array(0, dim = c(p, m, k)) + Z <- rep(1, n) + LLF <- 0 + + # local variables + Phi <- array(0, dim = c(p, m, k)) + deltaPhi <- c() + sumDeltaPhi <- 0 + deltaPhiBufferSize <- 20 + + # main loop + ite <- 1 + while (ite <= mini || (ite <= maxi && sumDeltaPhi > tau)) + { + # M step: update for Beta ( and then phi) + for (r in 1:k) + { + Z_indice <- seq_len(n)[Z == r] #indices where Z == r + if (length(Z_indice) == 0) + next + # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% + crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ]))) + S <- s$d + # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation + # of the initial product + if (rank[r] < length(S)) + S[(rank[r] + 1):length(S)] <- 0 + phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r] + } + + # Step E and computation of the loglikelihood + sumLogLLF2 <- 0 + for (i in seq_len(n)) + { + sumLLF1 <- 0 + maxLogGamIR <- -Inf + for (r in seq_len(k)) + { + dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r]) + logGamIR <- log(Pi[r]) + log(gdet(Rho[, , r])) - 0.5 * dotProduct + # Z[i] = index of max (gam[i,]) + if (logGamIR > maxLogGamIR) + { + Z[i] <- r + maxLogGamIR <- logGamIR + } + sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2) + } + sumLogLLF2 <- sumLogLLF2 + log(sumLLF1) + } + + LLF <- -1/n * sumLogLLF2 + + # update distance parameter to check algorithm convergence (delta(phi, Phi)) + deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain? + if (length(deltaPhi) > deltaPhiBufferSize) + deltaPhi <- deltaPhi[2:length(deltaPhi)] + sumDeltaPhi <- sum(abs(deltaPhi)) + + # update other local variables + Phi <- phi + ite <- ite + 1 + } + return(list(phi = phi, LLF = LLF)) +} diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R new file mode 100644 index 0000000..8449d10 --- /dev/null +++ b/pkg/R/computeGridLambda.R @@ -0,0 +1,36 @@ +#' computeGridLambda +#' +#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator +#' +#' @param phiInit value for phi +#' @param rhoInit for rho +#' @param piInit for pi +#' @param gamInit value for gamma +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param gamma power of weights in the penalty +#' @param mini minimum number of iterations in EM algorithm +#' @param maxi maximum number of iterations in EM algorithm +#' @param tau threshold to stop EM algorithm +#' +#' @return the grid of regularization parameters +#' +#' @export +computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, + maxi, tau, fast) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + + list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, + X, Y, tau, fast) + grid <- array(0, dim = c(p, m, k)) + for (j in 1:p) + { + for (mm in 1:m) + grid[j, mm, ] <- abs(list_EMG$S[j, mm, ])/(n * list_EMG$pi^gamma) + } + sort(unique(grid)) +} diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R new file mode 100644 index 0000000..9743f0c --- /dev/null +++ b/pkg/R/constructionModelesLassoMLE.R @@ -0,0 +1,111 @@ +#' constructionModelesLassoMLE +#' +#' Construct a collection of models with the Lasso-MLE procedure. +#' +#' @param phiInit an initialization for phi, get by initSmallEM.R +#' @param rhoInit an initialization for rho, get by initSmallEM.R +#' @param piInit an initialization for pi, get by initSmallEM.R +#' @param gamInit an initialization for gam, get by initSmallEM.R +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param S output of selectVariables.R +#' @param ncores Number of cores, by default = 3 +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with several models, defined by phi, rho, pi, llh +#' +#' @export +constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, + maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose) +{ + if (ncores > 1) + { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit", + "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", + "ncores", "fast", "verbose")) + } + + # Individual model computation + computeAtLambda <- function(lambda) + { + if (ncores > 1) + require("valse") #nodes start with an empty environment + + if (verbose) + print(paste("Computations for lambda=", lambda)) + + n <- dim(X)[1] + p <- dim(phiInit)[1] + m <- dim(phiInit)[2] + k <- dim(phiInit)[3] + sel.lambda <- S[[lambda]]$selected + # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix + col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars + if (length(col.sel) == 0) + return(NULL) + + # lambda == 0 because we compute the EMV: no penalization here + res <- EMGLLF(array(phiInit[col.sel, , ],dim=c(length(col.sel),m,k)), rhoInit, + piInit, gamInit, mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast) + + # Eval dimension from the result + selected + phiLambda2 <- res$phi + rhoLambda <- res$rho + piLambda <- res$pi + phiLambda <- array(0, dim = c(p, m, k)) + for (j in seq_along(col.sel)) + phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ] + dimension <- length(unlist(sel.lambda)) + + ## Computation of the loglikelihood + # Precompute det(rhoLambda[,,r]) for r in 1...k + detRho <- sapply(1:k, function(r) det(rhoLambda[, , r])) + sumLogLLH <- 0 + for (i in 1:n) + { + # Update gam[,]; use log to avoid numerical problems + logGam <- sapply(1:k, function(r) { + log(piLambda[r]) + log(detRho[r]) - 0.5 * + sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + }) + + logGam <- logGam - max(logGam) #adjust without changing proportions + gam <- exp(logGam) + print(gam) + norm_fact <- sum(gam) + sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2)) + } + llhLambda <- c(sumLogLLH/n, (dimension + m + 1) * k - 1) + # densite <- vector("double", n) + # for (r in 1:k) + # { + # if (length(col.sel) == 1) + # { + # delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r]))) + # } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r])) + # densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m * + # exp(-rowSums(delta^2)/2) + # } + # llhLambda <- c(mean(log(densite)), (dimension + m + 1) * k - 1) + list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda) + } + + # For each lambda, computation of the parameters + out <- + if (ncores > 1) { + parLapply(cl, 1:length(S), computeAtLambda) + } else { + lapply(1:length(S), computeAtLambda) + } + + if (ncores > 1) + parallel::stopCluster(cl) + + out +} diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R new file mode 100644 index 0000000..dc88f67 --- /dev/null +++ b/pkg/R/constructionModelesLassoRank.R @@ -0,0 +1,95 @@ +#' constructionModelesLassoRank +#' +#' Construct a collection of models with the Lasso-Rank procedure. +#' +#' @param S output of selectVariables.R +#' @param k number of components +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1 +#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5 +#' @param ncores Number of cores, by default = 3 +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with several models, defined by phi, rho, pi, llh +#' +#' @export +constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, + ncores, fast, verbose) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + L <- length(S) + + # Possible interesting ranks + deltaRank <- rank.max - rank.min + 1 + Size <- deltaRank^k + RankLambda <- matrix(0, nrow = Size * L, ncol = k + 1) + for (r in 1:k) + { + # On veut le tableau de toutes les combinaisons de rangs possibles, et des + # lambdas Dans la première colonne : on répète (rank.max-rank.min)^(k-1) chaque + # chiffre : ça remplit la colonne Dans la deuxieme : on répète + # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ça (rank.max-rank.min)^2 + # fois ... Dans la dernière, on répète chaque chiffre une fois, et on fait ça + # (rank.min-rank.max)^(k-1) fois. + RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1), + each = deltaRank^(k - r)), each = L) + } + RankLambda[, k + 1] <- rep(1:L, times = Size) + + if (ncores > 1) + { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size", + "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", + "verbose")) + } + + computeAtLambda <- function(index) + { + lambdaIndex <- RankLambda[index, k + 1] + rankIndex <- RankLambda[index, 1:k] + if (ncores > 1) + require("valse") #workers start with an empty environment + + # 'relevant' will be the set of relevant columns + selected <- S[[lambdaIndex]]$selected + relevant <- c() + for (j in 1:p) + { + if (length(selected[[j]]) > 0) + relevant <- c(relevant, j) + } + if (max(rankIndex) < length(relevant)) + { + phi <- array(0, dim = c(p, m, k)) + if (length(relevant) > 0) + { + res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, + X[, relevant], Y, eps, rankIndex, fast) + llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m))) + phi[relevant, , ] <- res$phi + } + list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho) + } + } + + # For each lambda in the grid we compute the estimators + out <- + if (ncores > 1) { + parLapply(cl, seq_len(length(S) * Size), computeAtLambda) + } else { + lapply(seq_len(length(S) * Size), computeAtLambda) + } + + if (ncores > 1) + parallel::stopCluster(cl) + + out +} diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R new file mode 100644 index 0000000..064b54b --- /dev/null +++ b/pkg/R/generateXY.R @@ -0,0 +1,39 @@ +#' generateXY +#' +#' Generate a sample of (X,Y) of size n +#' +#' @param n sample size +#' @param π proportion for each cluster +#' @param meanX matrix of group means for covariates (of size p) +#' @param covX covariance for covariates (of size p*p) +#' @param β regression matrix, of size p*m*k +#' @param covY covariance for the response vector (of size m*m*K) +#' +#' @return list with X and Y +#' +#' @export +generateXY <- function(n, π, meanX, β, covX, covY) +{ + p <- dim(covX)[1] + m <- dim(covY)[1] + k <- dim(covY)[3] + + X <- matrix(nrow = 0, ncol = p) + Y <- matrix(nrow = 0, ncol = m) + + # random generation of the size of each population in X~Y (unordered) + sizePop <- rmultinom(1, n, π) + class <- c() #map i in 1:n --> index of class in 1:k + + for (i in 1:k) + { + class <- c(class, rep(i, sizePop[i])) + newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX) + X <- rbind(X, newBlockX) + Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% + β[, , i], covY[, , i])))) + } + + shuffle <- sample(n) + list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle]) +} diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R new file mode 100644 index 0000000..44b4b06 --- /dev/null +++ b/pkg/R/initSmallEM.R @@ -0,0 +1,79 @@ +#' initialization of the EM algorithm +#' +#' @param k number of components +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' +#' @return a list with phiInit, rhoInit, piInit, gamInit +#' @export +#' @importFrom methods new +#' @importFrom stats cutree dist hclust runif +initSmallEM <- function(k, X, Y, fast) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + nIte <- 20 + Zinit1 <- array(0, dim = c(n, nIte)) + betaInit1 <- array(0, dim = c(p, m, k, nIte)) + sigmaInit1 <- array(0, dim = c(m, m, k, nIte)) + phiInit1 <- array(0, dim = c(p, m, k, nIte)) + rhoInit1 <- array(0, dim = c(m, m, k, nIte)) + Gam <- matrix(0, n, k) + piInit1 <- matrix(0, nIte, k) + gamInit1 <- array(0, dim = c(n, k, nIte)) + LLFinit1 <- list() + + # require(MASS) #Moore-Penrose generalized inverse of matrix + for (repet in 1:nIte) + { + distance_clus <- dist(cbind(X, Y)) + tree_hier <- hclust(distance_clus) + Zinit1[, repet] <- cutree(tree_hier, k) + + for (r in 1:k) + { + Z <- Zinit1[, repet] + Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r + if (length(Z_indice) == 1) { + betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% + crossprod(t(X[Z_indice, ]), Y[Z_indice, ]) + } else { + betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% + crossprod(X[Z_indice, ], Y[Z_indice, ]) + } + sigmaInit1[, , r, repet] <- diag(m) + phiInit1[, , r, repet] <- betaInit1[, , r, repet] #/ sigmaInit1[,,r,repet] + rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet]) + piInit1[repet, r] <- mean(Z == r) + } + + for (i in 1:n) + { + for (r in 1:k) + { + dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] + - X[i, ] %*% phiInit1[, , r, repet]) + Gam[i, r] <- piInit1[repet, r] * + gdet(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct) + } + sumGamI <- sum(Gam[i, ]) + gamInit1[i, , repet] <- Gam[i, ]/sumGamI + } + + miniInit <- 10 + maxiInit <- 11 + + init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ], + gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y, + eps = 1e-04, fast) + LLFinit1[[repet]] <- init_EMG$llh + } + b <- which.min(LLFinit1) + phiInit <- phiInit1[, , , b] + rhoInit <- rhoInit1[, , , b] + piInit <- piInit1[b, ] + gamInit <- gamInit1[, , b] + + return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit)) +} diff --git a/pkg/R/main.R b/pkg/R/main.R new file mode 100644 index 0000000..d710b7e --- /dev/null +++ b/pkg/R/main.R @@ -0,0 +1,161 @@ +#' valse +#' +#' Main function +#' +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param procedure among 'LassoMLE' or 'LassoRank' +#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC' +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param kmin integer, minimum number of clusters, by default = 2 +#' @param kmax integer, maximum number of clusters, by default = 10 +#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1 +#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5 +#' @param ncores_outer Number of cores for the outer loop on k +#' @param ncores_inner Number of cores for the inner loop on lambda +#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 +#' @param compute_grid_lambda, TRUE to compute the grid, FALSE if known (in arguments) +#' @param grid_lambda, a vector with regularization parameters if known, by default 0 +#' @param size_coll_mod (Maximum) size of a collection of models +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with estimators of parameters +#' +#' @examples +#' #TODO: a few examples +#' @export +valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, + maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, + ncores_inner = 1, thresh = 1e-08, compute_grid_lambda = TRUE, grid_lambda = 0, size_coll_mod = 10, fast = TRUE, verbose = FALSE, + plot = TRUE) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + + if (verbose) + print("main loop: over all k and all lambda") + + if (ncores_outer > 1) { + cl <- parallel::makeCluster(ncores_outer, outfile = "") + parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", + "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", + "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", + "size_coll_mod", "verbose", "p", "m")) + } + + # Compute models with k components + computeModels <- function(k) + { + if (ncores_outer > 1) + require("valse") #nodes start with an empty environment + + if (verbose) + print(paste("Parameters initialization for k =", k)) + # smallEM initializes parameters by k-means and regression model in each + # component, doing this 20 times, and keeping the values maximizing the + # likelihood after 10 iterations of the EM algorithm. + P <- initSmallEM(k, X, Y, fast) + if (compute_grid_lambda == TRUE) + { + grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, + X, Y, gamma, mini, maxi, eps, fast) + } + if (length(grid_lambda) > size_coll_mod) + grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)] + + if (verbose) + print("Compute relevant parameters") + # select variables according to each regularization parameter from the grid: + # S$selected corresponding to selected variables + S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, + gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast) + + if (procedure == "LassoMLE") { + if (verbose) + print("run the procedure Lasso-MLE") + # compute parameter estimations, with the Maximum Likelihood Estimator, + # restricted on selected variables. + models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, + P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose) + } else { + if (verbose) + print("run the procedure Lasso-Rank") + # compute parameter estimations, with the Low Rank Estimator, restricted on + # selected variables. + models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min, + rank.max, ncores_inner, fast, verbose) + } + # warning! Some models are NULL after running selectVariables + models <- models[sapply(models, function(cell) !is.null(cell))] + models + } + + # List (index k) of lists (index lambda) of models + models_list <- + if (ncores_outer > 1) { + parLapply(cl, kmin:kmax, computeModels) + } else { + lapply(kmin:kmax, computeModels) + } + if (ncores_outer > 1) + parallel::stopCluster(cl) + + if (!requireNamespace("capushe", quietly = TRUE)) + { + warning("'capushe' not available: returning all models") + return(models_list) + } + + # Get summary 'tableauRecap' from models + tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i) + { + models <- models_list[[i]] + # For a collection of models (same k, several lambda): + LLH <- sapply(models, function(model) model$llh[1]) + k <- length(models[[1]]$pi) + sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[, + , 1] != 0) + 1) - 1) + data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, + complexity = sumPen, contrast = -LLH) + })) + tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ] + if (verbose == TRUE) + { + print(tableauRecap) + } + modSel <- capushe::capushe(tableauRecap, n) + indModSel <- if (selecMod == "DDSE") + as.numeric(modSel@DDSE@model) else if (selecMod == "Djump") + as.numeric(modSel@Djump@model) else if (selecMod == "BIC") + modSel@BIC_capushe$model else if (selecMod == "AIC") + modSel@AIC_capushe$model + + mod <- as.character(tableauRecap[indModSel, 1]) + listMod <- as.integer(unlist(strsplit(mod, "[.]"))) + modelSel <- models_list[[listMod[1]]][[listMod[2]]] + + ## Affectations + Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n) + for (i in 1:n) + { + for (r in 1:length(modelSel$pi)) + { + sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2) + Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * gdet(modelSel$rho[, , r]) + } + } + Gam <- Gam/rowSums(Gam) + modelSel$affec <- apply(Gam, 1, which.max) + modelSel$proba <- Gam + modelSel$tableau <- tableauRecap + + if (plot) + print(plot_valse(X, Y, modelSel, n)) + + return(modelSel) +} diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R new file mode 100644 index 0000000..ec2302d --- /dev/null +++ b/pkg/R/plot_valse.R @@ -0,0 +1,89 @@ +#' Plot +#' +#' It is a function which plots relevant parameters +#' +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param model the model constructed by valse procedure +#' @param n sample size +#' @return several plots +#' +#' @examples TODO +#' +#' @export +#' +plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA) +{ + require("gridExtra") + require("ggplot2") + require("reshape2") + require("cowplot") + + K <- length(model$pi) + ## regression matrices + gReg <- list() + for (r in 1:K) + { + Melt <- melt(t((model$phi[, , r]))) + gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + + geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", + midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r)) + } + print(gReg) + + ## Differences between two clusters + if (comp) + { + if (is.na(k1) || is.na(k)) + print("k1 and k2 must be integers, representing the clusters you want to compare") + Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2])) + gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle(paste("Difference between regression matrices in cluster", + k1, "and", k2)) + print(gDiff) + } + + ### Covariance matrices + matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K) + for (r in 1:K) + matCov[, r] <- diag(model$rho[, , r]) + MeltCov <- melt(matCov) + gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle("Covariance matrices") + print(gCov) + + ### Proportions + gam2 <- matrix(NA, ncol = K, nrow = n) + for (i in 1:n) + gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i]) + + bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + + geom_boxplot() + + theme(legend.position = "none") + + background_grid(major = "xy", minor = "none") + print(bp) + + ### Mean in each cluster + XY <- cbind(X, Y) + XY_class <- list() + meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2]) + for (r in 1:K) + { + XY_class[[r]] <- XY[model$affec == r, ] + if (sum(model$affec == r) == 1) { + meanPerClass[, r] <- XY_class[[r]] + } else { + meanPerClass[, r] <- apply(XY_class[[r]], 2, mean) + } + } + data <- data.frame(mean = as.vector(meanPerClass), + cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K)) + g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster)) + print(g + geom_line(aes(linetype = cluster, color = cluster)) + + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster")) +} diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R new file mode 100644 index 0000000..39e54d2 --- /dev/null +++ b/pkg/R/selectVariables.R @@ -0,0 +1,81 @@ +#' selectVariables +#' +#' It is a function which construct, for a given lambda, the sets of relevant variables. +#' +#' @param phiInit an initial estimator for phi (size: p*m*k) +#' @param rhoInit an initial estimator for rho (size: m*m*k) +#' @param piInit an initial estimator for pi (size : k) +#' @param gamInit an initial estimator for gamma +#' @param mini minimum number of iterations in EM algorithm +#' @param maxi maximum number of iterations in EM algorithm +#' @param gamma power in the penalty +#' @param glambda grid of regularization parameters +#' @param X matrix of regressors +#' @param Y matrix of responses +#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 +#' @param eps threshold to say that EM algorithm has converged +#' @param ncores Number or cores for parallel execution (1 to disable) +#' +#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi +#' +#' @examples TODO +#' +#' @export +#' +selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, + glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast) +{ + if (ncores > 1) { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", + "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment()) + } + + # Computation for a fixed lambda + computeCoefs <- function(lambda) + { + params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, fast) + + p <- ncol(X) + m <- ncol(Y) + + # selectedVariables: list where element j contains vector of selected variables + # in [1,m] + selectedVariables <- lapply(1:p, function(j) { + # from boolean matrix mxk of selected variables obtain the corresponding boolean + # m-vector, and finally return the corresponding indices + if (m>1) { + seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)] + } else { + if (any(params$phi[j, , ] > thresh)) + 1 + else + numeric(0) + } + }) + + list(selected = selectedVariables, Rho = params$rho, Pi = params$pi) + } + + # For each lambda in the grid, we compute the coefficients + out <- + if (ncores > 1) { + parLapply(cl, glambda, computeCoefs) + } else { + lapply(glambda, computeCoefs) + } + if (ncores > 1) + parallel::stopCluster(cl) + # Suppress models which are computed twice En fait, ca ca fait la comparaison de + # tous les parametres On veut juste supprimer ceux qui ont les memes variables + # sélectionnées + # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ] + selec <- lapply(out, function(model) model$selected) + ind_dup <- duplicated(selec) + ind_uniq <- which(!ind_dup) + out2 <- list() + for (l in 1:length(ind_uniq)) + out2[[l]] <- out[[ind_uniq[l]]] + out2 +} diff --git a/pkg/R/util.R b/pkg/R/util.R new file mode 100644 index 0000000..f8b01cc --- /dev/null +++ b/pkg/R/util.R @@ -0,0 +1,7 @@ +# ... +gdet <- function(M) +{ + if (is.matrix(M)) + return (det(M)) + return (M[1]) #numeric, double +} diff --git a/pkg/data/data.RData b/pkg/data/data.RData new file mode 100644 index 0000000000000000000000000000000000000000..a9f09e143adfa04f595a40b993efa688ebd05a4b GIT binary patch literal 1010 zcmb2|=3oE==GaM@0asWhj{V=w{n|mQeJZ0?yPJwYdQQuSf71okLr$bBM0OY(I0_^N zB&Bp3`1J}H1sw5cv5*q#c4oZ4)NO)xLc}`Vgvf{|MK_sSQ!S%5``1+5dp@($-u83# zp1vQqvVLU=u{f$`m}ogJUAagqv{a)-YT?IAwW0g3haMC=(tYo)pZ}B}Im)Wfqw37M zWHq>#seesqJ-Xrh&8h>cU7Ms{Pc;s>{_>Gnk{ma8v(K(=EA2c!ovIAGWBg-p<(4N+ z&Qf|3)n>DAWjo2gp5^VBVNq72`zi0NR`Z-o!5byB8^_~ri)SIJYwqq~KmPsi#A&B5ADFr8v6P)q`5X_~_rCt?jv79f`*w)G z^~d>#Epo3dY$v?g-tGNLhv{eJL0|XZrXd%S-{miP-@f(Tru8%Ga(_JP{naxec-6fg z??v0TzxCL4Ro6B?QCp{S*4?ukGMZ;KcfL`3TNq)fRNftF-`6!IzfUsV{`l0Z{3nI0 zn;Znb?9q={z^uJdZ&OP8JF|c-8#*$kzWvp&>bNIm)w)a1%I3ZKUnzHNT1QY~;(_A^ z3wCUr-8p;1#yw?w_i9ZK=}t*u7o8k4OZES!Yi)LCtpC*hzwkg;Z?1Vj`L{S5k6XWk zC4H|KuF42|kr=|idZFUyShetkbxWd7NpT!f%@*!TT0L!vgyk%ebrp>Nmlrjcm9bA3 z+L5_!&iz*9jn|gzKb11k#svud#=*2Qvlv(7nfTqt>2 zZ7X}*q`4Jq{)GgcHM8hAxUpqvL&kEojbf*=`aa6OWn3P)x=ZAGRLp+C+=r#_VvE9` zS--Mk^IWl}$87Zc~vdlKEiqmeXyMDXKe0}YSd?~-5mc49Bwf&ZN*YI_e{9$Hb008yy=cxby literal 0 HcmV?d00001 diff --git a/pkg/data/data2.RData b/pkg/data/data2.RData new file mode 100644 index 0000000000000000000000000000000000000000..80003e3c090402d85e308401bb23159ebe1d53ca GIT binary patch literal 18385 zcmb2|=3oE==C^y;1411)>v^41P+(fb(V=v+rGurKnMtdQ#nq8VK|#w?#6^HbRkKM+ z!O``kN0394!WXsYbBfPdrdP}#L1eWrcjQmiOXegxc$}N zi%;M9eS4Yx%Y}dSp4;~RahY*4Zr4NedHkm3ns?UAtx*i>=;tcttUb0*`{`o8{L6{o zT)gky?FdxzTu`!TN||9}&WRfj?|2BUxp#5*r3o(^8u#~H+9$oInN@1msU7pb8C*Ml zZNZ`vDfeFc_{`@K3KPOt@BcY#gZOg2T|ebtsvezI_;}-v72hrVKHoH+{#(t*IzDfW z))_IyG+izL!+`q+w zLA{rSf_vvb3iy5X>5Mdgl7&%**Cmn#g$7Z@T>M z9_QTCS1io0WIT{aWch2;`;j^6ph1rGmIu*k{q;{jXKe3nO*|L+A^K-xxX%8hWpmw* zH@|&1yV>aR`GP+Qzvu0d5Y5Q^TIxSd;>g7>LBB3-zIS8YyyTi{1^H)g>DN=f{(0~| zKT++zvW&qZ@w;<$mvtRw``*hZed3_O`!_*TzOa3{`#r#fFQPs2yXNslofUI(Kd-7O z-Ol^0TFKN&^2USr54X5~%z0^CN+&cpA+Rev4ap)^S-uWGUMugb~@-H_FV zPyJ^6SjNfw<>>yImhu}pn498n{g#?k;QQs}<)4>~FZ}qs>AI(A6-W9uQ}ypgN4~_i zp0f>&y>Y916W{fx39FTjUaYqjpQZZ$Mao6>gSlrr&0 zEn@rl-`x*WD_o}UUYz?=&!ju%TC{y^OR-mujW27Qi_o6y39}A1ZtJwqlV)piwtM-b z`20SmjI+N)Pu$uo{@i7k22&|}!j!oGxv#?Y-|T;(c3WJ8>#)+g?`gBoBu|3UF1&p(NeXC=M%%EHe6g6?c=Els6Yrn?lpEx&$OU8Ql*H1N}nmi^_mb*{ew(i{e^m=gG#(>T0 zj}O+)zJ25P#-HClw8uvjPM+6VG@Z@NW}oM@BMIHb7e6*#y{NR~+nNw3R@{;%~qu)%Y)a9hB4*58L` zh?qT^pLl-rzKTZg*Eed4{+~J$#{6aO-t}|(<$h>4l)g*+C@RsdqZ#+Po^SO-&x)T8 ze9w-@XWo&S-qxfR%ALjO`XcO#?YZmJq-7)caW)pWy@wdz$l^3^nySXX;^=v)%t}<)R z|35kmWvWVsK|kjCd{f#Mw$e?vJ?{9z~^ z59@yx8w|dlE!E@7I{CKU%r4UD+Fky;>HH-R`bFmnM^_6ia6SHhp13ZvSo(>~FH6N7 zN_rmJ8K2kRP}LQ5HlT~nDM0dJm?I{IcmDo3`94`x!!dsnewcpmR*tGc5ija=> zr|T!pZ=1@0*yqVp~_@y^f_$BiWnzgQ7@mDeG!<^`f?J>8Qf)kJTOPI>FZ+#o$+_p3Kj2X9z(N(!7 z#@^2IeW^cpsLcG}*7={mamu@S_gs5ZJjy=XB>w-!(igEm;m-UOt%023?_WRRNd8fG zJx%KLW|pA%Wbwl%lW&yVUwr@g9)4q(Q&Kb-pl_so~bV9#I_vmdUA0`NW#(1-aB`uivPGO6U#D5dx3@FyEkc{EzbR^ zse2NBNIqu6`O5fGQ;~EH=6ChC@At2E>|4s)oWH>{^-=4(x7iWvdOub~+OWU;mMX7h zB2zFi$ix{&kqz~I=uR2)aall8iS;jA(f5)k;=(f)f*_?KYd0L2G z`0XD1js491L)Gr?6%&5+EK6;x5lUM>SL4l{x7>+IM@vN;w=c~2cX{z{cIgFl?DSrGIrH(ORSU09Zw@ukF7JAraL9Ge&!?Nc*LSlQW_>;=xv7|CQf2K-$AvKw zl?oH4q+jFV{`mLKCIvR`y_fV{?&W1jU0r-_O?~&_I<51YT3iz%4r(|3Gm+;rEB_m! zV0^&!<1}a2IqDl0o9f(n+F`yXF6OM2t!$Ip1-88lWk3+_U{do4i;&h+iu@Cb=k=yH~V(m*zL6oP*^9GwAX`O)xYJj zh(%O^-z?SktTrjJ{DABp)==jvzw!(2T`Dr$V{EqTZ10@>=Cktq_VA4c9d8d^yu=a_ zyH46hc5;{UKGpL+D@qDZyDn_D@NPbQ&7Nu9b#2GT=?k52o9*8)_41>r)4!G8d=%7O zDdKaWu=LZe{(kcXo3~Bd5P7n9rzGd^qsGDo8w%pn6^Q)S}%XHO1GaH@!Vq&17nCqBOt^Ui4{e~RU@ zz(5I)gJ-UC`ObSUpK#^(wVU79UN+ccr?w>3rA^p4J&rAMZ)1?^;Rns#%r~mL{$9LX zAjGk1?flBZXz^vIie~zH@hI9m2AoN{%6mH|Gwn_2Hp5~U#}`+Ys{ELy;q=qhl{csD zoT2x&O+ubQSC;xF38Zh=FK>~Y`rwDKSM>E$x-qJ;;<71s8&*CvRxW>bY~jHl*E7ye zlhgaNF1#*Wzq8wN6|<$vYL+gBp#KNeVr9(k=^vS*#`pPVY2ZIn?EhkW_uNB|JK5bfX^}ImlO3Ndj%0guck3P1`7tx>cOTJn{LnN{`rT=x&Fd4I zPrgezS|ie@`l`ink9XtCFEyR#zRAr4fb8gLE_G00+ zi*sLPOt#>OpT6<$g$uq5UfX4*tcz;?sXBYfxvGyWM*dUJw}eMtn6-2H{UtNyt7hK) zJ?UiG!za0`cYS_))8N1xNwc_nTrXLV*kr!-w^)1KS1ZG_)mQbb_G|4aJNBF}f7rTP zMNfBL?O}DTeqOuyr5bnb6DLNw|6cNyW6j>P`)z)`*7$PbOy(!wl*8wzcSXg^%~P0U zV6Qyye5WeJyCiS?s>{&uPX5P&HbV$V90 zE75Vu=eqxY>rT{eGcnm+#-XhHW?E2veapXP%fsbs*0=7nUY&ORQDs8tS^MYTvvVan zvI?qVgZhHM9L@f|xO`soi|B(}PrCj{d8{71_Nv&KA3=O<^E;0Ew;fqEC9iPKntltp z!uq)mD=S`J-gd*tMSgaKIG06>^E#v4xYN@YyfV1zvF?cDnv|%E`{q1k+xy!&`Nx#B zje1TgpW99=^?1HZzOTNJtMjm(-FzFp$3dB4X)SNeV-~$pSbHsq>Cx-F>l-JT-Fflz z%d$m(j8Z1Nk=-UPuY764>cX1=&mNYXxwY-8r}~TqOD7quuw(bO@0Cw`Yt6WH>s}Z0 z;_x|2I`%V?w3`JB_X(9hS73VSTXeYTjl+zL??oY7`x*}l7JoN?H`~Cm{^zc<#l<}T zRFdoJE_0;ie6vsA5_Ih5MD|C&*pVfxSUK)m|rFHW9E79=+QTsK!) zH)O%{^Zl>S7e1(qeIaY^%=Glu50BXe?0g@}{mtU5d3z`C7q#28@vy~Kr_OH=^IcA@ z@4D_Zb!&n04HNtOuZ+IQn^;J$KE?HJ%k^)5uVy8OC2!K>FSS`$5S4Xz=M-jPwfFV5 z@2uOnzxZu8`}r>Ua>JeM&DDJurslHl`>1_EOGH}Z%2$csHvY1SC54CQ-F@}4?C13a zwL3S9;(b(?Owr!B-&pg&yG6x)auaheSo`n$y?&-$kNC;LdFMAb&G>H2TdHoCAocpP z|C{n%2`&AHG=hJcE!k2oQ@(2vqwU$RyH0=K(O6M1z2dR2+>I< z?NojKw6K4l=%W{}E4HpUzHEc)g7ZI~+|;@vA>q;U)!M=T>x8Bw58H1nOz`zS{Da%d zbO*nCBJ;WlH9715W%2xNtry+y{A}T|)^``Wbt>mv*s`Y1LW5x~YmR>R@5PByb$5M# zdGw~r?%t+x?|G7|$IIH=9$7M()e#X|#w^Bmf$6`P?+JTeZ#n#J?*5+aFRX`5l;0O_ z3fMlWaEVlyylu_-=D+v4r5@iGzkJpl?i&7VE@t;Ai${fD4^9bQa8ZO$-^!v&lkEY^`cq%| zf3-&Z{F}YEn2UYghn;ujZe;fsuWwa*F~w`)dLNxt#Sd-$g2i8Wii;iop&@yFEr;kE z%LUs4B?B6Rr#|lAc~oF}EGKM-)-hp?yNl$`jIK=b;+%O zsg~#TPA<)GfBo3u8nXueJ5v}`*iHgB_*L}W?O$L)%QETG^l1id^a^?Q)of$)%`une1%Vv zobKLmcz8-zRAJbo;cpV&wKmuUEzt zN`(IuE&7tkTOzsQxY->WrXzoO?&>^E-Em0kY4W7_hQG^y^1mnxet64QJ9_6N*?PkX zpU*O~an9L)r7%@zD|dLP{WAH5s~W{T{k<=pcZ~>|a_E$~a_7_jcRpN~QiMEz+}HVH z$eEn}^UHkBUsiL{mU#yJ{LK6OQLTM?#GD4fuI*oq`Cjj+iC<>^vO#ofHqYPn&-{|)cwu1d(cTf|qp&7Di^%02PDljDwUI?8RI z=OceiaM#~oQX#x7o5X+1M_e*V^eGR2JAEo+&V;WzMM|F@2P}yFQC7tHO*3tVd&QLl z9#fu#vBxX^dn-3hl+T6p_rfO%GhA|nWEZUorGL`@Fq_rh%H7PT!x1Lbj^Mdc_nRzwyjs!-m zE$dpNbor2>WBcDX>nE2#lG#6fV*7uG_Z4ph(_W`rIIoF|(%aqqzV_Pd%k5_pjIw_ z4)8K%K7W;F;+MZAZ=>lI$N4qxkAL@x%KQ*HwECi0KezvW`Tj3EdQJOv*37e?-pu%y z>+8hM!i*=~HS_PMw%7i#PqqkhDU7)OPt)%Gp;?=DZ?CKAJ^qdPY2@MN--dtdj{KRi zjgiOD)YPt2zWGMjvk$k-YIQzc6G-GyJUGc)>?6)Ju zd)&L>(CPXMpD(lc#N9sk@V_swveB8Vu0Pdw#KaX>y1ZvPr;+trU!BFNdsXOV#s9Mn zc278VGWV<0vc4#p$MuI9m5-VWul}(p!E296$bw8Evv;+-DvizMxRY!=pD*88kaFw% z>v=!b`a-n#?Kpg}Z+6|c_j2`?Y;V^kbA9rbHF&b`%g!1NLGx0vE|EGer?zL3UppR6 znp@HFO-s^KnumXPz@ISFM#Hs-kFP7%Ofa_GdP4kJiO>5TU)HUe$0x?ZrG2KyNpb1E zpI6nT#9k=wn>5*5{qRiTmHXDDv0D6a*ng>Aov}(H*^KABw!>;={_o+&iA*aC!WLhi z7k=aWyCa(?tiH5Eb87bA_NR>7?robiW22$c1Pi&Ft9mV)XG?#SD{=gEW6J4m3MFOI z&rkn4?vY=Xbo<{7wV+AW8*&64B9G>?zYVn%%->kMeDCwj<9j3*-ZXZ9^L?q}8o_U3 zSvR)wbM?<&|I(xV(YF-E<dEnx``y}jd1{n+p8EQu$xqFLTbuq(HV8efnk`?* zcUwu~T<*J>Ez7!?_8wgK`-XAWX9bVvta@^P>)PBONsD-$y8j_?QLANJ6OWd=m(tAO zlIJH^f3?_j!gbXNZ?5jT9CovF^IEPo8s;C8UpKcxXHRPLf8%rOY(7ug?ce@ctNH}* za?T5felEE5_f-7&I-G8}1-oU=}aqq>icP%XDuTk?= zCHp<6!_zKV{ynU`;p*<|{cjH{gw@~Gsl8S!DW+sK`N{Dw8)L0Qza0?v%MmHxt&vm4 zDZS{m=KeW1I%kX2^nC9y67swn;Mc>y;B5WNt`E1(FJ!-6yf;9Z@#4m&b@6_C5AC0? z`e~_O{LzJnc6%7=8HoU{{0KiYZog}bbs>q?e`diK(!+lS93Y{+Ot}ns)%!x&%XVn<+qBC zf2pd`2CYX6eqO72dca*mW0n4*zu&lS^KDySwuw9O`QtU+H8p(^kHel+YOF6g{@!ES zDo>L(*JH7NuII%ZojF-@Wtd%7MBg1x!e^LBJwUy^@mvFEYEdY z9DEg$cj`04^vv9hOX~c|-0lf~vV)yplpB7(D3W{dPwa^anmb)oZra!DUkKqA(p~>S z>ZsnM_(_RxH{L4`ka~Ulx8@U-KAEVEW|m)sZp-y;n)T`Qqb<(%{`0z}O@iBb{7znR ze0ceW?!W7@7ScD>1FTdS?_S;gIg8$}c&)+|=cGcRB8`W~Fj)zMJ#qE4J<)-}qtA@EV_C22dJ#z=^vnMxa znzqjq%y{%Bf95h@$Gr3wb+`ncvu9J=%K`jY%DO$mm(rWKrW5)0cO zaZ%4T<)Ot?uf+KM;${VtV#Detwro~sUz~MF-|_sF&kr6)8X7ELS#teDfsUTNnqc2b zj>dI&t}WSf>pIJ(p7c2^J5%2zb}i&TD{(s{V z-SKZ`v9^@hI>9}mtL+n}e&L#!^3}LRZo|o5n}0mwJ+=Pb>c5=dJM&#Xp%EQX{irq} z=dgs>jw9Dz&7b|yfyIBzNhgVQnml{cJkB?r{hYn<&96NHpAP(evG{w*g?iVU=Vl$} zd~U@uOH8lkXk1nIMB`5Lh0D_>emat5l=ta8XU{C2_k8j2zQ4{_;nwoL#q= z94`#H{k`pLXRyKUC(k#oV7EA3w(f=W=f2q9iGDggV$E_lKkGTy-Ez_5{O$hq)?bP0 z7kP!pjw&8`B@iq3f4f1v%8CE8zgw!r?9Q&ry)COfea5;}HkYLPkH7O@o_^2a?whaI z^Q7AsWN!bvT*h2;Mo_%vXNr~6tte24TIg;KK@fuS=Ezg>AL_h^3h zQ8}iieiPH}YSJUM_GKHh%bpI|by)n-hOMSm4|l8(FFC#b$lku3d!qYO-&H%mzVFhOf6&cB zYE@V5-une!oj>RPIbwB2vu5v`qDqnAkcvJ-`R9jjN+)Z_f8#iK<5RKRZwuXs>2pti zGx>64-?Px#$2ZswKDutdW}9!$x3=cZ{IYZHM|QGy@;D~nm?z?w^d{o)!yUe|&Y9^s zawbw$jK35AE{s_IvCXV;j-=}BAK?ji9o<)ceQQz{bf-#ad&aiM>nqCtKP@@%Y?3I~ z#+Cga+TDEK9#>;Ox0%J@u31srT8YPaS%|>H4`= zJv?Ed#*6I?>ef6myAOF(xd?E*r5Q&cU!-Fx7$vPs0u_wMP~Z6^<_o!YtnP8PGl-B%|MUAX>{-?FUeFngLc>*M{) zA9{N5l=szppUnR||K&H%zhT@{H}tz43;ucc==v7UkY7gOtL*ayXKMao$_}0HoPH$g z$m@5zUAmMr1-DL_7hzrYCv6{({JA*ki!Z*IwH}oyH@y7PkLe}%!uzXAcnxN_{QKnn zpUvu0Y@wCt2ku$A(Cws)9#%{zpP%ryf92hwQgv{ayjK6(W}Ao72BX#TGZQyAJm2hg z>~q|G&NDGnznPtP?RX+xExc!2FvpZq-sY;)IY;i^JS1c4WM1&t^JP1akfM@k|H9u( z-uwN|-uvCo;IHb0X~bz7$&Fe^8)O{|cy7CC(MovTN} z+t#nwCLWYv`rLbC-@b*+F0(FQ^UqnfwByRO=X2|ww0wdWoi1Nn6*JX1MaCkc^YV2k zxe86cgSV!iy%ORbD6{$1v(u4h8(XVy6vbVc;&EK(_ixAZ2j{-L^1%_v)Fd%;Pnpd$yLD9cbHojnOq$^}_c}dTt62#2P2B^iQ~&VSbJ+GWu8i zPlp7)kov;n_v%VpW2V#uUt!AIS*uXp?PIaTVUw*w=_H3)8+1fgZRF_o>pv_l%azc) zzwX=R?PZ7RzOQ-l(O!SUHIp}^JgePmwy| zp>*kQnOXmWE6PRt{>s)p@8dbc9c6DPc8pW_)cyI~kF@7FK6{eA?V*d*>OG6@*IHNY zDDix7Kth%8Q^oBS6|03;O!E@ylHaSkVc+H3QqlEJ33fA!H~cfg=eLQrdrSFfDeo}-zW9=}@r)BL?{BTNdCZVK{pvoC{rt^8r+vTwbFti^ zXHnBX96Vrp!|0}D$u!Q1Ep|`x{}u>NY>T-&_y6s4f2T~oa?aMV@hg*k|EsvjhR|Qv zG#{$_cU-Z$KCgX?+u>_M&Y!P0u_{F`ttpKSRMOM?IdL1;6vl}!FWle$q><%Sqw%Tq z{2qY}`>e$wJIv0kep+9lGK2TCD$m64H)eL+nXk5deW-Bfljd7u=Pv0dO#GHtb-hBo z@V<2T@rd5ZJEqki%CRiBpICCAlP!8TKg+T&^LJL>O1Usy(rki`2z#+)G4GY5*6&)s z{duAGA~`+BZuv$5#{Vbg`_I|w>At|Wcvc3fMzn<;1rPl22 zxvvV%-fmd-=q&G}ch$F*gtGhXnRm{<_|@;Uw_|ytV=DK=S@RVyO)eHa zwkD!HewOsHdneSKBEPE7=)5Vc@#m8X->fU&1v94>PQ6&*JfW6VT0DP5 zjh$j#vhLEfjh@$qBW-WETI$suSM?LU-TKT;DK&tj{^%A?ovMm2mriIu-lE*hm#M?r zxOM8jPp`I{%!!D4a-CbV@$6KSn_nhpZCDa?AUWsYq?oyC>A^l8(&b-j|BywVxj-LatpQx!KkOjsM%pkHFY^3T+JTHY$^Da8|6wm;Zo zck}V?{aTzl%)fXUAGt7Df7cEyj7&WA?z5-nneBXs9&MQQ&bifW&V~=)ME?B{Gpn_` z_a|AW!<&1trs3VjBeCDyZB_5>Q8h@B=scNjaA1lgd-kDytcTv^Yq`yObdY7z>vS8X z&$AK||E=B}=_+~7&33h?rwxno)B7_;=KfBMvAUW0c!lz>5Bp~Axa7(dedwXmH2!Rk zCkJhlEFWz;5-Yi$X~)ly4e^OT%T6qP%;L6pic9_WyDVYv6xU|R8b;9Jz(YC zallvBipjLLr13{|8B@6U->t9pax$xM}^tgWy@YCK3?J+)c2h+$A#y-v}OC(pV7JAar+CZUz|Lz zwM>DDRq`x%{6rz8)+a5?V$&=pnkL0(du526Q~OZ*I=)OL=+#HBrKW5Nzs`PqAAU@$ zb4~xt``gok-nE!Mt~zt>lFOzmhx6Y5k2vUc>iX_-)x(@Ascn6G-oHCs7VyI|-_mQw zOS?N>`@E+g_@ox%^rlOQ=gq_Oq4EYtHtP1ud#lCLFv+q#+G z>^aXIS{+s6Q)waoP3O{*SKN(8{oUFPKb7=qe75$UpQAoudqJj(NZx^G*BjT_b9&w= zk6zIo-P*ljdDD$4@7z=_c~;$2S#K`rSyFmC>ioUdpF8swp0S;|VNu@C>(;F^53Gz{ zv$X1~#5tM1-8N3A`@98rpUb;b*WS$kt*-J%xqPFJshM-w3ZZ8_%=@JBDjxexU8MPN zJy%%Soo!#=J#1fl!P=xVPLO5Sfem6W`zl`Su2+0=_~LdKv9HrV?$$RIeEC#uYj~OY z0k!X6Qr{llfuUdc7__HAXAG8eIvlXvS)j%l6RaN2)I&$fiHmtwQyE|}=a zJwI>i5ved~{-(wMtNJ<}3f)6*Sp3sD_bpE9LUcg;)Tl5!;dLxZz6Yjw-M<~ra;_u1 z%!FT}Am)5$&B_*0GheUbIca|rRV_n5W%b)X3%@9MRxGz;x6soQ&t88QnAub}V}JM5 z<6jq9WT*Z!zEK>iGw1k$eusxXwYC{w6&kV*XGG&`|hIt;#wACt+51*eb*cqPuUVd`>gaePSBx$~FG1Sl4 zAFmsIASa0L>vqRdgFkCe>D<3J`$XOw8|$x=nmz7&sl+~sRW({Bv-$cqQN;-vY+0Oa zA%bQ3_a^Rg&vsDIoKQRU%atyNO&@anYgsrOEmx@?I9$_L5b&gU$LZ4i7$K`~Q`Q7c zc6cj#Ripfe>A4S6C9ZUeJy^7f=fBPM*fW!>9L_mkc3M&>zkiabcDWjtak02b4_J}Yi;GEgmVrm%bA;}AMJE0N@Kov zz1C*O$2D&wYJ4Rc`+h#UUfjPU(fQz|6@{AY$D41Ri$B4qW^jFihQY+C!WlvJC-1zT ztvo4pZGVs2SAob9`@$ceiyy9c?pDG%J-iy0mHNn~DL!9gCL)CK9 zTuE14;$I1zJigOp+WQku&ztODJ}fo6W3uH>WR1ZDhoe#QZ&G)=C3yYVoi}3<-(>f? z_y_zwssYbu*Y15^$gpeO&&Gp0G;|EV_Rd(um>7P)Ah5(rd`IQ%nI37D4=+f+-2R52 zsdLGi!;Z!^M32O%g*@KDzFFR?SqI_I0*VcI1t;y`MfTaA0rCn9U=8 z`{|<)rTPZT0WEefwY2R^jihEPv&a)Yr>gZ=Ih}jZ{V%MsFHE&QwKBf?EqQCE(}u)%ldmZ};L2t%p7WD6snBY2lJ&gD z;%id)YOYxa_c(7ZUG{2sDaRFm4W+f4UMT#}cfP4nXm0Xm>YjW5gL%}|{j^@sl1S`4 zy!UuwbL)-_rbL1{XHT-O6Q3+0%dhVAmD5~*)ogKv z)?=POoFDt@R4iFK_eiV3vD0%Bg$-FIze&7xe81))vzz;VJE^*F{CIWlPqwcjJ%;%< zddYBAVwvg=T)GuGOIKO*$L(cqZZEfY3Gz8wElUzl)U4^hFhN?~Y-0cX?UIHn z6ZY=UUH$tRXUMWEoYMO%`3`H!<+?LXFRXm_p>?Bc+?t(Fl2cbCv>WT2*xL6-EMA^5 zb4ySF#}av;4|U}+yw6)hPaA)IpZNEWChzt2To>|7Y8I!Quan^A(w%(1`$(SH<&#fd z>8)9H`}Tapy)C**+WXmm%Wi!4>|6T&Pr5VsEarSM@~Jxh?%tFRk&OpcC7tzuHAh+$ z#r}M_x?{S4a@r@$;P(oAwh28yLd*I*Eo2|^cioU9cj!-XS#^5t;=mj63eCnxzlj$`^*PUcuvmH1Iah;C{$^{+QN%^A(6|`!ErCXBU{Y%j;V796ejq@OB$Ne#9Kd1x_cpY@08<2stJnOxeo<(u;`;#5JOjjHg~#p+SQt;iMMhNzT7JwlxtH_U^g@D%FSPFc5k}vuYY%CKISna z)Nj?ZLssj5A8+0*k@zm`XyWwnID>Lk{gu8iO#Du6wy5;FVT5?h_On&5CG=`Odsxo;$>1hzt)=wizQgIgpHsdU)jfFg z!+195F5Njl&L}@p@Z_qUCg&{LwmV@-g{Rq*8*z8eM0WgGaDYSk;Bt#qyMJt~pY=Fn zj4L72iS%av2O{?0kGf4)_l z!@bjhlI^8 zpWpR^!}#FAEaM}44c=ZcyZ>boOPWi4eZB4N9`BX>deQn?k9VxD)=>U#vyCT@O@L{c zL3nA~V}a|EGk$9A4=sPockb$uV|(trZnRZNcV44=yzs~`$L{Sf%zC!U^G%fc)oYr6 zl=FFC)PX5(tIN9b%>6XVe`-aob&~tZ@_60$RO`pDwJfgx=GgXa!}rZEz8ze$*LW%8 zNzN4k)2BUN7kW4{&~0Dv^VY<5iMzWl?&isQ_Tzn>Gb5L z^2r4~Y15qfJZjbAXB$?;DkWVu>J{Gq|J0K^&l|7qdBbjMzhvR{x6MwSGuXeK&7Z^f z|M0%e^HS#YD<3;_V|H(|Nc`&r&3#8!zK^!^`E=^rgbdyX0UIBh%#7smk&ZYacl&wI z(YENLMxm>2yjR$K=GAZB-$oTr_e{Q>s2Y9L{>gcPD;1eM!LObqzp;F>Wag}i{V7HCh?_;orUjaPE3{ejTl*KC zexCcb0o@+sWS3-?LT*?>9}}d)i}XWto~!N}TPA51E{|9>;#0UCiSA zrd#Rf{u0Iw=k7gHXil_P@oPb@@jtUI$L`vakJ#FOP+_hC6ewnOitx ztFS%S-2Rm__^zIP@a6PYy@x)TF-4Uk_e~3BmY-R*sY_eCF!H?GFR!UgJ#N?ET3)CV z$`;}OwJq*;%Pyw#TqidlPfU1s=3>Og;tlsEidSoIXG)*TzuLny?vQG3S@VURf;pyH zrb@Q=yCxT|^m$osygrNhPo-|zn#f8k4QSVGAwT0;na^_m^7qW3B`iKjhm+h4%=cV*X_>~Ptnc*BMegE}$iJ*B6K#v$)IL76z&fF4>S6XWQ4j4T;oj!` z>o~-FUglgc{B-qj$g&{i>+;qw9)H-ByZ0UU;y2fJJ(-?=pyTvsj{ooXS-4eiw!gN7 zLGHTed6T#;2d_;yx%BY%{|7GpT&7;5y4T~3?X*~>H+_r0Ona6&S=(T>LTSkoXTj(5 zuNw#OtaJF}F?-IH`MMG|F;zMN?()+NR`jl0&D`#@Y=6I${gIcy_w9e$bpLT%XxrR3 z55sQ+&HcIK>J#g{d*+WVpDMQ2F72J%|JFCbr24|ISEuFPyw^|oyr}SqG@feJ-rh8=+3mCi=7WzKi02} zm@9Xrmzhhk_vHO|2V;6e8u-t%=lr^}dYWymWZIHBdB%Hqj{bYnAZ@6#YKO$^89R5^CPlhmIvOcc^TqLJ!Ey~f{TGwBYehNq zZ@T=HMNm>^!j9Lm6ANTsm%NshDpx&t;@I~^c1B@ye*E&A>UT|WmeBKjo!aNeShKs7 z-mgwiy#2~_{ks2&YhLS$cQJU+seAW1$LNFpH=j_ATIH2TG#fWs>3%Lf^EG$9#$%J? z3)@=0U%#5!F~!zSeeaXY3!b$WI{e`EGZ-(cxV)@tE5MTilL{7My!{^5^vLJ09p?Msu`-=kYiM4mNt&Yy3(BQHGp?faZ_yBnYEc(W$m ze)Dgwz8rr$Nnf!enem5JHa@Mku$bxc>Ca;gA4~Oq=>eR}~4?cW1E;Dy>7vmB0pDWSATc3&tJA^@+9`goK4Z{wvqC;pW2J0WKDT^ z=Eh}ERpUV5&mG<*F?A`gP)NmK4jP=>9{l`Bk=$Sl! z{=Zk`Na8_u$#|CHg(nh}wte1c##1Y?aqfP>ZtlK)b&Kwm?MN~V%axSnNfxv956Rw| z_*&%c`W^k%8$)yq3(qWOOD~vbP^c^v#nP62DDO@~+~oQ5`X4oweKFa7^JN};_IlwX z@1F-aU5{Fjce`9FXNkwZ;^dWFZu=ka>qyDdu$2<_ny10ay=u>C?!|8=l=b-jeO)1t zCo_BhtZgrvj9CSze&o8o;O*tLTNa9c-f%NhckxF*K6l;Eyw_*0U9iyQ#*{mj^46t% z%dXen%$%?Ph37n@ZN#k?HkYfu2|rP}lmDVu{NTLe``c6Qye)hf8SqZ*q~r?Yoz_3j ze33jC7L%QNyH>EO+Oj=#|J@@;gUeOFU47wn+xg%5=uA6<`XATwi{`nk;Y(b6+t!`; zvElV=tZMrom32(oxcd^j%q+jH+3zPg?>=8%^F3_ZL#KsX>JHZknHjPh?CncDyzd7W zlkD?4i5mZt(pi5Q%`!#u`sH7?*sa^v9~D@$;m75}^Dh6iowzNy;BM1SnN@c8??gZ5 zF$i0GDl_=6$gy)hHlF(__;zWv4bzX9>1KJ>ZLT(d#g|7 z5r%J3UYU2!?_>1tandYH*B8q8|AcG4-m&$kL=EiW1kkjXlmOt|A#)!XP@7#f0J0|crD@EX_Ie$OXDgZPU`qNS@L72 ze0#zR@w0|+&mVnI9pw~0qh6n<^+|L^>+ht4Hv=B~T@O>@+#GVAKe(9b=IT|((?j_l zxSXu>KP3^E!xj0XR3$<l!?xcnfcrsEZ3gvDuX&D|?I7+22fW$L>s z@x6Hd?}lG?Gc3&Izn|Oi$o|sXl-vIw%lzIy-}uk>l0Wq)PW+Sh|Hc2OUSIt;yXDRQ zF8|KI`fuD*zvgrQSI_^P@BXj3|9`RB|Mjo_ZQl52bK!sEKlM4U=ga<&o&W!|y3mwo zztY_N%OsxF-*10l?>Fh*4}Z7k_WHZiSN(r-vi`-_{a4Zju3s@X-(nzKpOljFL*7b# z^DV*O?>=k`+WbiHcXIP(l`D6DOgv+4@GEoOpTIt+f9cHHAFC(lXxkr`Upf6%@QL4x zQeQr|ds32kKkVSI`%5p)_mbm$uJ5*I@ssqK4^Q5k9Lf7ybI+qNw|ZU`KAFU-i%B)n zKTjy`dwXcR*Yz()7U|ft?Xpdt`Qi7T8C8b0Q*XqtQJc3oIe1^a^P(q>$DhpLDe<|n zfn8^U-5#^g4cZ>7&Ayn-KKCn7d)vj`+IqW{Y^Tc2Ehw*%wJ3S{N70Kdv`;w5nW)k-0prtM36nZ}AhUgDdYRE#};EY1^63)9)>ru5euoN;!37vG%mbv6Ehx&hh;o z-tV$9ZdFBk#?qt!y~_(t_iJ0d+G;R+ahY}T%vlRk=G{MUvarBnOYXE7r?qOeR%wsk zU%4FJE}d~sakh)me5+QbP zU8V88ha0VC?Z0B)yD8|&efN#J(qeZOsRbL!>V_{9_O^fL$RoUTTG_eiAD!#Vt+C%5^doDb z=1lV$4td*>f*#9z=PkMaG9mNzw5O{)BX1{vdp)6c>b=0I|I42J;*S;NQT_PLWVfZ> zUe8Geoah%B$(;Asw{A68JDy1<g7J|TRu1Q zhr3gM{+s?@?=3sykLX@aG~b;gZZzTG-5DHh!TcpIaW^-=SjjnYeL>~+^*>{i^)rLh zJ!YRYqDf_Imq{sOj_WEZJqh{fBw&n`Q3pQ`C(s-)fibKDk(Z{S(3TV^V7RUiZ2?mapDW z_9s&7|DVp4u9w$*5WaLd^+WAv6T^hp;Tyk-CbyQ~)Svw==!b9ptSHkjlJ*ADt9OOo zKJim^y1R+nse^_Ev%jgacHf-8-d@wKc=6{q@+-fsu*pp?zBhTc!{@)VwupXRBzk*x z2h-#2p@C=D^;eXvF}^C(o^@%;%l;3+!r45(YVPOw{p_6g^H7wTa-Z{OGgCL470 zq^q!e;mUf$r+92zbo7KL52qbcW!}5dXn*WEejdH9pbu;Gmhqq4vHWG-jDo7Y^&6gr zY*XjgT>A3%lh8|dBqEprL_bshbaaJwf z_bB+l#kBpQ*=rl)=f&5*x4iz>N+sUSW$ODB|1-w3#jI{_C|{rZ%Z^QLes$J+Rf7rT z*-r8ACZEXLdw%KTV=7Z>tggz-7xV44l|3AsynUDT-pfCxomXy~Rm8bI|IXXHZTb}= zXY!A<2HbnHXr*QO-K!D$g{eLNnx-#1t9f_MhP9e&KeqJ$l;pR1&R+3w@}=*8B~OGd za$dMZFnXfh>vH!4PEW3#)SvdSysGr&!`qpaj&Yj~hlo|1Jl7AfiFhI(lvHOSGVPN~ z#@erI|A^#g)b771sBioCUgcx|HF-_*>%xPMadbBdGJZO?n6SbKW=%b&`fX`cSG?q4eU zeLwt3@fF=ND_v#9r6$hjE2hl)t5)h#!F>F8cJtSrEjzpKdL><~(U-J;BN`!|Z2Y8O zc%S)=-?O8iuYD-+?)Lsnk%OryJyeaud~~) z?%NZ#EY@;~@W<;`4>!Gu>K0V?Sf){5(D6?peA}|XKTe^3m63k;t(xpQR==@%&M&-m z>ghY&taDe{9Ddroh5bh6_n^1pw*q#ashIX;*_8N)B|!&RZ5OUOyDwt*`))10(r9mX zUS*T5n|Wj|uln}KY`Lec>Br~0Vx}%_`6$`AW}o(zk7wUIRhiCt(Vurn_+!bdlhe0# z-ZYtVQoK^lZuM_D!*5=%xssnL?4O?XuJ`>}X^*~&AW z--BNsU0tJJKY4X++mpMKQY8F~Zk~9x_C%C*vwNVtQN5SJ+1>3KZ8xQ!-4_V?D_8S% z?WeWIm8XNMjN)zBlC*yI)yh0puAM&3c(d+4gF3g5UYA@$#douN-%NAqU7~RNPsY(% z6Wugd9!@(K6rZPNmueDpZdEB#v8b&cMWw`-k6_1{ubAS0A>_`eWQnA^*O6N>O|Lcxr9)FIk zJ^Xf(+vAno=T8-XHz;|s|MA`zpW+I)gf+}{ZCI;i`R8!wdiVGLGQTQ0U68&fr(L;t z<<*xb)842B^vr*H|9I5Pw*uM{FKn!03rb3FTZAb%M?b0zy)AZDzjXS{BX5jn^B;~1 zYw=kxba)HLvny7i%VN#e1wCK%Xj=Y9PW`gWH-gu)5KVRZ%N?yFC#dF`GD$V)Lf4A(Kw7bm#&|coA^`qj_Hz1vqN;(hz8l}D*vlH9K80xx6-Dh^7k8M76~-wG|c+1!h2f! zv1fJZN5@wiwjWj#?S7uz^-5};V2kIAU|aL^UuA4XPlZld{d4)xGupdeY~eY#tXJFq zMOL!GvEN_UJm157T>f(Sn_IF+V(%84s_cGrZo5O{m3Qwx)pV^}S$B2)#iRLhwUaE& zUn|wVU37lWDc->E7W?|2>Rz0fb9aC1Rd(C6Uzqnu-mAa8-|xpWz4=-3tFOpsY=2v! znNs@T+oy_ex|MO;wyj)kuabJR`CI2P&p!LRzcU?Dj;u6Y@W<;1ubT3Lj*8;Le_n}) z?OV~c;6m)G>6g-6XKkAn{$*X3?%mz8KVBv|UiC}J`uRbmm;L{=J)7>%Q(V8Wk}dBT zf6#AH{-ybQHhq2MyzPsvW#jkfyO!?@om>9?yTvV`y9FPw?@<5zUHyKh*u0HTxxdVl z++Gs)C0XJ3aoyD-J^yM?Yb}%A*W%#(X@ASV*5CTS?l*S)Ut97^{ptVMU+1g-^L+oW zdC&gv-}>(F{X70Is=rvj^#7-KFYbSHU-CQu`2L%7>TAwB6rMli_S`;~|IDBDFE8#d o_}}__Kl8i)@xTA-->HB2ufF-W{bpO0=>O~+WTI~z3TI#d0L*yv5C8xG literal 0 HcmV?d00001 diff --git a/pkg/data/script_data.R b/pkg/data/script_data.R new file mode 100644 index 0000000..1585337 --- /dev/null +++ b/pkg/data/script_data.R @@ -0,0 +1,15 @@ +m=11 +p=10 + +covY = array(0,dim = c(m,m,2)) +covY[,,1] = diag(m) +covY[,,2] = diag(m) + +Beta = array(0, dim = c(p, m, 2)) +Beta[1:4,1:4,1] = 3*diag(4) +Beta[1:4,1:4,2] = -2*diag(4) + +Data = generateXY(100, c(0.5,0.5), rep(0,p), Beta, diag(p), covY) + +Res = valse(Data$X,Data$Y, fast=FALSE, plot=FALSE, verbose = TRUE, kmax=2, compute_grid_lambda = FALSE, + grid_lambda = seq(0.2,2,length = 50), size_coll_mod = 50) diff --git a/pkg/inst/testdata/TODO.csv b/pkg/inst/testdata/TODO.csv new file mode 100644 index 0000000..d679966 --- /dev/null +++ b/pkg/inst/testdata/TODO.csv @@ -0,0 +1 @@ +ou alors data_test.RData, possible aussi diff --git a/pkg/man/valse-package.Rd b/pkg/man/valse-package.Rd new file mode 100644 index 0000000..534375b --- /dev/null +++ b/pkg/man/valse-package.Rd @@ -0,0 +1,37 @@ +\name{valse-package} +\alias{valse-package} +\alias{valse} +\docType{package} + +\title{ + \packageTitle{valse} +} + +\description{ + \packageDescription{valse} +} + +\details{ + The package devtools should be useful in development stage, since we rely on testthat for + unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette. + Concerning the other suggested packages: + \itemize{ + \item{parallel (generally) permits to run the bootstrap method faster.} + } + + The three main functions are ... +} + +\author{ + \packageAuthor{valse} + + Maintainer: \packageMaintainer{valse} +} + +%\references{ +% TODO: Literature or other references for background information +%} + +%\examples{ +% TODO: simple examples of the most important functions +%} diff --git a/pkg/src/Makevars b/pkg/src/Makevars new file mode 100644 index 0000000..50b7fb6 --- /dev/null +++ b/pkg/src/Makevars @@ -0,0 +1,11 @@ +#Debug flags +PKG_CFLAGS=-g -I./sources + +#Prod flags: +#PKG_CFLAGS=-O2 -I./sources + +PKG_LIBS=-lm -lgsl -lcblas + +SOURCES = $(wildcard adapters/*.c sources/*.c) + +OBJECTS = $(SOURCES:.c=.o) diff --git a/pkg/src/adapters/a.EMGLLF.c b/pkg/src/adapters/a.EMGLLF.c new file mode 100644 index 0000000..f24cd2a --- /dev/null +++ b/pkg/src/adapters/a.EMGLLF.c @@ -0,0 +1,91 @@ +#include +#include +#include "EMGLLF.h" + +// See comments in src/sources/EMGLLF.c and R/EMGLLF.R (wrapper) +SEXP EMGLLF( + SEXP phiInit_, + SEXP rhoInit_, + SEXP piInit_, + SEXP gamInit_, + SEXP mini_, + SEXP maxi_, + SEXP gamma_, + SEXP lambda_, + SEXP X_, + SEXP Y_, + SEXP tau_ +) { + // Get matrices dimensions + int n = INTEGER(getAttrib(X_, R_DimSymbol))[0]; + SEXP dim = getAttrib(phiInit_, R_DimSymbol); + int p = INTEGER(dim)[0]; + int m = INTEGER(dim)[1]; + int k = INTEGER(dim)[2]; + + //////////// + // INPUTS // + //////////// + + // get scalar parameters + int mini = INTEGER_VALUE(mini_); + int maxi = INTEGER_VALUE(maxi_); + double gamma = NUMERIC_VALUE(gamma_); + double lambda = NUMERIC_VALUE(lambda_); + double tau = NUMERIC_VALUE(tau_); + + // Get pointers from SEXP arrays ; WARNING: by columns ! + double* phiInit = REAL(phiInit_); + double* rhoInit = REAL(rhoInit_); + double* piInit = REAL(piInit_); + double* gamInit = REAL(gamInit_); + double* X = REAL(X_); + double* Y = REAL(Y_); + + ///////////// + // OUTPUTS // + ///////////// + + SEXP phi, rho, pi, LLF, S, affec, dimPhiS, dimRho; + PROTECT(dimPhiS = allocVector(INTSXP, 3)); + int* pDimPhiS = INTEGER(dimPhiS); + pDimPhiS[0] = p; pDimPhiS[1] = m; pDimPhiS[2] = k; + PROTECT(dimRho = allocVector(INTSXP, 3)); + int* pDimRho = INTEGER(dimRho); + pDimRho[0] = m; pDimRho[1] = m; pDimRho[2] = k; + PROTECT(phi = allocArray(REALSXP, dimPhiS)); + PROTECT(rho = allocArray(REALSXP, dimRho)); + PROTECT(pi = allocVector(REALSXP, k)); + PROTECT(LLF = allocVector(REALSXP, maxi-mini+1)); + PROTECT(S = allocArray(REALSXP, dimPhiS)); + PROTECT(affec = allocVector(INTSXP, n)); + double *pPhi=REAL(phi), *pRho=REAL(rho), *pPi=REAL(pi), *pLLF=REAL(LLF), *pS=REAL(S); + int *pAffec=INTEGER(affec); + + //////////////////// + // Call to EMGLLF // + //////////////////// + + EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau, + pPhi,pRho,pPi,pLLF,pS,pAffec, + n,p,m,k); + + // Build list from OUT params and return it + SEXP listParams, listNames; + int nouts = 6; + PROTECT(listParams = allocVector(VECSXP, nouts)); + char* lnames[6] = {"phi", "rho", "pi", "LLF", "S", "affec"}; //lists labels + PROTECT(listNames = allocVector(STRSXP,nouts)); + for (int i=0; i +#include +#include "EMGrank.h" + +// See comments in src/sources/EMGrank.c and R/EMGrank.R (wrapper) +SEXP EMGrank( + SEXP Pi_, + SEXP Rho_, + SEXP mini_, + SEXP maxi_, + SEXP X_, + SEXP Y_, + SEXP tau_, + SEXP rank_ +) { + // Get matrices dimensions + SEXP dimX = getAttrib(X_, R_DimSymbol); + int n = INTEGER(dimX)[0]; + int p = INTEGER(dimX)[1]; + SEXP dimRho = getAttrib(Rho_, R_DimSymbol); + int m = INTEGER(dimRho)[0]; + int k = INTEGER(dimRho)[2]; + + //////////// + // INPUTS // + //////////// + + // get scalar parameters + int mini = INTEGER_VALUE(mini_); + int maxi = INTEGER_VALUE(maxi_); + double tau = NUMERIC_VALUE(tau_); + + // Get pointers from SEXP arrays ; WARNING: by columns ! + double* Pi = REAL(Pi_); + double* Rho = REAL(Rho_); + double* X = REAL(X_); + double* Y = REAL(Y_); + int* rank = INTEGER(rank_); + + ///////////// + // OUTPUTS // + ///////////// + + SEXP phi, LLF, dimPhi; + PROTECT(dimPhi = allocVector(INTSXP, 3)); + int* pDimPhi = INTEGER(dimPhi); + pDimPhi[0] = p; pDimPhi[1] = m; pDimPhi[2] = k; + PROTECT(phi = allocArray(REALSXP, dimPhi)); + PROTECT(LLF = allocVector(REALSXP, 1)); + double *pPhi=REAL(phi), *pLLF=REAL(LLF); + + ///////////////////// + // Call to EMGrank // + ///////////////////// + + EMGrank_core(Pi, Rho, mini, maxi, X, Y, tau, rank, + pPhi,pLLF, + n,p,m,k); + + // Build list from OUT params and return it + SEXP listParams, listNames; + PROTECT(listParams = allocVector(VECSXP, 2)); + char* lnames[2] = {"phi", "LLF"}; //lists labels + PROTECT(listNames = allocVector(STRSXP,2)); + for (int i=0; i<2; i++) + SET_STRING_ELT(listNames,i,mkChar(lnames[i])); + setAttrib(listParams, R_NamesSymbol, listNames); + SET_VECTOR_ELT(listParams, 0, phi); + SET_VECTOR_ELT(listParams, 1, LLF); + + UNPROTECT(5); + return listParams; +} diff --git a/pkg/src/sources/EMGLLF.c b/pkg/src/sources/EMGLLF.c new file mode 100644 index 0000000..d2f5a8e --- /dev/null +++ b/pkg/src/sources/EMGLLF.c @@ -0,0 +1,412 @@ +#include "utils.h" +#include +#include +#include + +// TODO: don't recompute indexes ai(...) and mi(...) when possible +void EMGLLF_core( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + int mini, // nombre minimal d'itérations dans l'algorithme EM + int maxi, // nombre maximal d'itérations dans l'algorithme EM + Real gamma, // puissance des proportions dans la pénalisation pour un Lasso adaptatif + Real lambda, // valeur du paramètre de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + // OUT parameters (all pointers, to be modified) + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* rho, // parametre de variance renormalisé, calculé par l'EM + Real* pi, // parametre des proportions renormalisé, calculé par l'EM + Real* llh, // (derniere) log vraisemblance associée à cet échantillon, + // pour les valeurs estimées des paramètres + Real* S, + int* affec, + // additional size parameters + int n, // nombre d'echantillons + int p, // nombre de covariables + int m, // taille de Y (multivarié) + int k) // nombre de composantes dans le mélange +{ + //Initialize outputs + copyArray(phiInit, phi, p*m*k); + copyArray(rhoInit, rho, m*m*k); + copyArray(piInit, pi, k); + //S is already allocated, and doesn't need to be 'zeroed' + + //Other local variables: same as in R + Real* gam = (Real*)malloc(n*k*sizeof(Real)); + copyArray(gamInit, gam, n*k); + Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real)); + Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real)); + Real* b = (Real*)malloc(k*sizeof(Real)); + Real* X2 = (Real*)malloc(n*p*k*sizeof(Real)); + Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real)); + *llh = -INFINITY; + Real* pi2 = (Real*)malloc(k*sizeof(Real)); + const Real EPS = 1e-15; + // Additional (not at this place, in R file) + Real* gam2 = (Real*)malloc(k*sizeof(Real)); + Real* sqNorm2 = (Real*)malloc(k*sizeof(Real)); + Real* detRho = (Real*)malloc(k*sizeof(Real)); + gsl_matrix* matrix = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + const Real gaussConstM = pow(2.*M_PI,m/2.); + Real* Phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* Rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* Pi = (Real*)malloc(k*sizeof(Real)); + + for (int ite=1; ite<=maxi; ite++) + { + copyArray(phi, Phi, p*m*k); + copyArray(rho, Rho, m*m*k); + copyArray(pi, Pi, k); + + // Calculs associés a Y et X + for (int r=0; r= 0) + pi2AllPositive = 1; + for (int r=0; r + Real dotProduct = 0.; + for (int u=0; u n*lambda*pirPowGamma) + { + phi[ai(j,mm,r,p,m,k)] = (n*lambda*pirPowGamma - S[ai(j,mm,r,p,m,k)]) + / Gram2[ai(j,j,r,p,p,k)]; + } + else + { + phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pirPowGamma + S[ai(j,mm,r,p,m,k)]) + / Gram2[ai(j,j,r,p,p,k)]; + } + } + } + } + + ///////////// + // Etape E // + ///////////// + + // Precompute det(rho[,,r]) for r in 1...k + int signum; + for (int r=0; rdata[u*m+v] = rho[ai(u,v,r,m,m,k)]; + } + gsl_linalg_LU_decomp(matrix, permutation, &signum); + detRho[r] = gsl_linalg_LU_det(matrix, signum); + } + + Real sumLogLLH = 0.; + for (int i=0; i EPS) //else: gam[i,] is already ~=0 + { + for (int r=0; r Dist1) + Dist1 = tmpDist; + } + } + } + //Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) + Real Dist2 = 0.; + for (int u=0; u Dist2) + Dist2 = tmpDist; + } + } + } + //Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi))) + Real Dist3 = 0.; + for (int u=0; u Dist3) + Dist3 = tmpDist; + } + } + //dist2=max([max(Dist1),max(Dist2),max(Dist3)]); + Real dist2 = Dist1; + if (Dist2 > dist2) + dist2 = Dist2; + if (Dist3 > dist2) + dist2 = Dist3; + + if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau))) + break; + } + + //affec = apply(gam, 1, which.max) + for (int i=0; i rowMax) + { + affec[i] = j+1; //R indices start at 1 + rowMax = gam[mi(i,j,n,k)]; + } + } + } + + //free memory + free(b); + free(gam); + free(Phi); + free(Rho); + free(Pi); + free(Gram2); + free(ps2); + free(detRho); + gsl_matrix_free(matrix); + gsl_permutation_free(permutation); + free(XiPhiR); + free(YiRhoR); + free(gam2); + free(pi2); + free(X2); + free(Y2); + free(sqNorm2); +} diff --git a/pkg/src/sources/EMGLLF.h b/pkg/src/sources/EMGLLF.h new file mode 100644 index 0000000..e15cb87 --- /dev/null +++ b/pkg/src/sources/EMGLLF.h @@ -0,0 +1,32 @@ +#ifndef valse_EMGLLF_H +#define valse_EMGLLF_H + +#include "utils.h" + +void EMGLLF_core( + // IN parameters + const Real* phiInit, + const Real* rhoInit, + const Real* piInit, + const Real* gamInit, + int mini, + int maxi, + Real gamma, + Real lambda, + const Real* X, + const Real* Y, + Real tau, + // OUT parameters + Real* phi, + Real* rho, + Real* pi, + Real* LLF, + Real* S, + int* affec, + // additional size parameters + int n, + int p, + int m, + int k); + +#endif diff --git a/pkg/src/sources/EMGrank.c b/pkg/src/sources/EMGrank.c new file mode 100644 index 0000000..3a9bf94 --- /dev/null +++ b/pkg/src/sources/EMGrank.c @@ -0,0 +1,307 @@ +#include +#include +#include "utils.h" + +// Compute pseudo-inverse of a square matrix +static Real* pinv(const Real* matrix, int dim) +{ + gsl_matrix* U = gsl_matrix_alloc(dim,dim); + gsl_matrix* V = gsl_matrix_alloc(dim,dim); + gsl_vector* S = gsl_vector_alloc(dim); + gsl_vector* work = gsl_vector_alloc(dim); + Real EPS = 1e-10; //threshold for singular value "== 0" + + //copy matrix into U + copyArray(matrix, U->data, dim*dim); + + //U,S,V = SVD of matrix + gsl_linalg_SV_decomp(U, V, S, work); + gsl_vector_free(work); + + // Obtain pseudo-inverse by V*S^{-1}*t(U) + Real* inverse = (Real*)malloc(dim*dim*sizeof(Real)); + for (int i=0; idata[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j]; + inverse[i*dim+ii] = dotProduct; + } + } + + gsl_matrix_free(U); + gsl_matrix_free(V); + gsl_vector_free(S); + return inverse; +} + +// TODO: comment EMGrank purpose +void EMGrank_core( + // IN parameters + const Real* Pi, // parametre de proportion + const Real* Rho, // parametre initial de variance renormalisé + int mini, // nombre minimal d'itérations dans l'algorithme EM + int maxi, // nombre maximal d'itérations dans l'algorithme EM + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + const int* rank, // vecteur des rangs possibles + // OUT parameters + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres + // additional size parameters + int n, // taille de l'echantillon + int p, // nombre de covariables + int m, // taille de Y (multivarié) + int k) // nombre de composantes +{ + // Allocations, initializations + Real* Phi = (Real*)calloc(p*m*k,sizeof(Real)); + Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real)); + int signum; + Real invN = 1.0/n; + int deltaPhiBufferSize = 20; + Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real)); + int ite = 0; + Real sumDeltaPhi = 0.0; + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real* Xr = (Real*)malloc(n*p*sizeof(Real)); + Real* Yr = (Real*)malloc(n*m*sizeof(Real)); + Real* tXrXr = (Real*)malloc(p*p*sizeof(Real)); + Real* tXrYr = (Real*)malloc(p*m*sizeof(Real)); + gsl_matrix* matrixM = gsl_matrix_alloc(p, m); + gsl_matrix* matrixE = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + gsl_matrix* V = gsl_matrix_alloc(m,m); + gsl_vector* S = gsl_vector_alloc(m); + gsl_vector* work = gsl_vector_alloc(m); + + //Initialize class memberships (all elements in class 0; TODO: randomize ?) + int* Z = (int*)calloc(n, sizeof(int)); + + //Initialize phi to zero, because some M loops might exit before phi affectation + zeroArray(phi, p*m*k); + + while (itetau)) + { + ///////////// + // Etape M // + ///////////// + + //M step: Mise à jour de Beta (et donc phi) + for (int r=0; rdata[j*m+jj] = dotProduct; + } + } + free(invTXrXr); + + //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + gsl_linalg_SV_decomp(matrixM, V, S, work); + + //Set m-rank(r) singular values to zero, and recompose + //best rank(r) approximation of the initial product + for (int j=rank[r]; jdata[j] = 0.0; + + //[intermediate step] Compute hatBetaR = U * S * t(V) + double* U = matrixM->data; //GSL require double precision + for (int j=0; jdata[u] * V->data[jj*m+u]; + hatBetaR[mi(j,jj,p,m)] = dotProduct; + } + } + + //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r) + for (int j=0; jdata[j*m+jj] = Rho[ai(j,jj,r,m,m,k)]; + } + gsl_linalg_LU_decomp(matrixE, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrixE, signum); + + //compute Y(i,:)*Rho(:,:,r) + for (int j=0; j + Real dotProduct = 0.0; + for (int u=0; u maxLogGamIR) + { + Z[i] = r; + maxLogGamIR = logGamIR; + } + sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0); + } + + sumLogLLF2 += log(sumLLF1); + } + + // Assign output variable LLF + *LLF = -invN * sumLogLLF2; + + //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi)))); + Real newDeltaPhi = 0.0; + for (int j=0; j newDeltaPhi) + newDeltaPhi = tmpDist; + } + } + } + + //update distance parameter to check algorithm convergence (delta(phi, Phi)) + //TODO: deltaPhi should be a linked list for perf. + if (ite < deltaPhiBufferSize) + deltaPhi[ite] = newDeltaPhi; + else + { + sumDeltaPhi -= deltaPhi[0]; + for (int u=0; u + +/******** + * Types + *******/ + +typedef double Real; +//typedef uint32_t UInt; +//typedef int32_t Int; + +/******************************* + * Matrix and arrays indexation + *******************************/ + +// Matrix Index ; TODO? ncol unused +#define mi(i,j,nrow,ncol)\ + j*nrow + i + +// Array Index ; TODO? d3 unused +#define ai(i,j,k,d1,d2,d3)\ + k*d1*d2 + j*d1 + i + +// Array4 Index ; TODO? ... +#define ai4(i,j,k,m,d1,d2,d3,d4)\ + m*d1*d2*d3 + k*d1*d2 + j*d1 + i + +/************************* + * Array copy & "zeroing" + ************************/ + +// Fill an array with zeros +#define zeroArray(array, size)\ +{\ + for (int u=0; u