From 3453829ed3723a2b18ac478a6b4ef5d087a9d68d Mon Sep 17 00:00:00 2001 From: Benjamin Auder <benjamin.auder@somewhere> Date: Mon, 22 Jan 2018 15:12:50 +0100 Subject: [PATCH] First commit --- .gitattributes | 5 + .gitfat | 2 + .gitignore | 24 ++ README.md | 18 ++ TODO | 15 + biblio/articleMethodeDevijverADAC.pdf | 1 + biblio/these_Emilie.pdf | 1 + hooks/pre-commit.bad | 42 +++ hooks/pre-push | 3 + pkg/.gitignore | 4 + pkg/DESCRIPTION | 43 +++ pkg/LICENSE | 23 ++ pkg/R/A_NAMESPACE.R | 16 + pkg/R/EMGLLF.R | 195 ++++++++++++ pkg/R/EMGrank.R | 120 +++++++ pkg/R/computeGridLambda.R | 37 +++ pkg/R/constructionModelesLassoMLE.R | 113 +++++++ pkg/R/constructionModelesLassoRank.R | 95 ++++++ pkg/R/generateXY.R | 39 +++ pkg/R/initSmallEM.R | 80 +++++ pkg/R/main.R | 152 +++++++++ pkg/R/plot_valse.R | 89 ++++++ pkg/R/selectVariables.R | 83 +++++ pkg/R/util.R | 7 + pkg/data/data.RData | Bin 0 -> 1010 bytes pkg/data/data2.RData | Bin 0 -> 18385 bytes pkg/inst/testdata/TODO.csv | 1 + pkg/man/valse-package.Rd | 37 +++ pkg/src/Makevars | 11 + pkg/src/adapters/a.EMGLLF.c | 91 ++++++ pkg/src/adapters/a.EMGrank.c | 73 +++++ pkg/src/sources/EMGLLF.c | 420 +++++++++++++++++++++++++ pkg/src/sources/EMGLLF.h | 32 ++ pkg/src/sources/EMGrank.c | 307 ++++++++++++++++++ pkg/src/sources/EMGrank.h | 25 ++ pkg/src/sources/utils.h | 48 +++ pkg/tests/testthat.R | 4 + pkg/tests/testthat/helper-context1.R | 5 + pkg/tests/testthat/test-context1.R | 11 + pkg/vignettes/.gitignore | 14 + reports/bazar_Emilie/dessinBeta.m | 97 ++++++ reports/bazar_Emilie/niveauColores.m | 18 ++ reports/bazar_Emilie/ondelettes.m | 83 +++++ reports/bazar_Emilie/ondelettes2.m | 67 ++++ reports/bazar_Emilie/pretraitement.m | 22 ++ reports/bazar_Emilie/recomp.m | 29 ++ reports/bazar_Emilie/reconstruction.m | 7 + reports/bazar_Emilie/resultatSousEns.m | 21 ++ reports/bazar_Emilie/sautDeDimension.m | 65 ++++ reports/bazar_Emilie/script.m | 30 ++ reports/bazar_Emilie/scriptEssai.m | 28 ++ reports/bazar_Emilie/scriptSimules.m | 9 + reports/bazar_Emilie/simulatedData.R | 120 +++++++ reports/essai16mars.R | 31 ++ reports/essaiPlot.R | 73 +++++ reports/simulData_17mars.R | 56 ++++ test/.gitignore | 4 + test/Makefile | 31 ++ test/generateRunSaveTest_EMGLLF.R | 48 +++ test/generateRunSaveTest_EMGrank.R | 40 +++ test/helper.R | 58 ++++ test/run.sh | 37 +++ test/script_data.R | 15 + test/test.EMGLLF.c | 83 +++++ test/test.EMGrank.c | 58 ++++ test/test_EMGLLF.R | 39 +++ test/test_EMGrank.R | 27 ++ test/test_utils.c | 103 ++++++ test/test_utils.h | 17 + 69 files changed, 3602 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitfat create mode 100644 .gitignore create mode 100644 README.md create mode 100644 TODO create mode 100644 biblio/articleMethodeDevijverADAC.pdf create mode 100644 biblio/these_Emilie.pdf create mode 100755 hooks/pre-commit.bad create mode 100755 hooks/pre-push create mode 100644 pkg/.gitignore create mode 100644 pkg/DESCRIPTION create mode 100644 pkg/LICENSE create mode 100644 pkg/R/A_NAMESPACE.R create mode 100644 pkg/R/EMGLLF.R create mode 100644 pkg/R/EMGrank.R create mode 100644 pkg/R/computeGridLambda.R create mode 100644 pkg/R/constructionModelesLassoMLE.R create mode 100644 pkg/R/constructionModelesLassoRank.R create mode 100644 pkg/R/generateXY.R create mode 100644 pkg/R/initSmallEM.R create mode 100644 pkg/R/main.R create mode 100644 pkg/R/plot_valse.R create mode 100644 pkg/R/selectVariables.R create mode 100644 pkg/R/util.R create mode 100644 pkg/data/data.RData create mode 100644 pkg/data/data2.RData create mode 100644 pkg/inst/testdata/TODO.csv create mode 100644 pkg/man/valse-package.Rd create mode 100644 pkg/src/Makevars create mode 100644 pkg/src/adapters/a.EMGLLF.c create mode 100644 pkg/src/adapters/a.EMGrank.c create mode 100644 pkg/src/sources/EMGLLF.c create mode 100644 pkg/src/sources/EMGLLF.h create mode 100644 pkg/src/sources/EMGrank.c create mode 100644 pkg/src/sources/EMGrank.h create mode 100644 pkg/src/sources/utils.h create mode 100644 pkg/tests/testthat.R create mode 100644 pkg/tests/testthat/helper-context1.R create mode 100644 pkg/tests/testthat/test-context1.R create mode 100644 pkg/vignettes/.gitignore create mode 100644 reports/bazar_Emilie/dessinBeta.m create mode 100644 reports/bazar_Emilie/niveauColores.m create mode 100644 reports/bazar_Emilie/ondelettes.m create mode 100644 reports/bazar_Emilie/ondelettes2.m create mode 100644 reports/bazar_Emilie/pretraitement.m create mode 100644 reports/bazar_Emilie/recomp.m create mode 100644 reports/bazar_Emilie/reconstruction.m create mode 100644 reports/bazar_Emilie/resultatSousEns.m create mode 100644 reports/bazar_Emilie/sautDeDimension.m create mode 100644 reports/bazar_Emilie/script.m create mode 100644 reports/bazar_Emilie/scriptEssai.m create mode 100644 reports/bazar_Emilie/scriptSimules.m create mode 100644 reports/bazar_Emilie/simulatedData.R create mode 100644 reports/essai16mars.R create mode 100644 reports/essaiPlot.R create mode 100644 reports/simulData_17mars.R create mode 100644 test/.gitignore create mode 100644 test/Makefile create mode 100644 test/generateRunSaveTest_EMGLLF.R create mode 100644 test/generateRunSaveTest_EMGrank.R create mode 100644 test/helper.R create mode 100755 test/run.sh create mode 100644 test/script_data.R create mode 100644 test/test.EMGLLF.c create mode 100644 test/test.EMGrank.c create mode 100644 test/test_EMGLLF.R create mode 100644 test/test_EMGrank.R create mode 100644 test/test_utils.c create mode 100644 test/test_utils.h diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..df69bb5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +*.pdf filter=fat +*.zip filter=fat +*.tar.xz filter=fat +*.data filter=fat +*.png filter=fat diff --git a/.gitfat b/.gitfat new file mode 100644 index 0000000..fc5cc54 --- /dev/null +++ b/.gitfat @@ -0,0 +1,2 @@ +[rsync] +remote = gitfat@auder.net:~/files/valse diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d45f42a --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +#ignore temporary files +*~ +*.swp +*.Rout + +#ignore R session files + RStudio files +.Rhistory +.RData +*.Rproj* +.Rprofile +.Rbuildignore + +#ignore R CMD build/check genrated files +/*.Rcheck/ +/*.tar.gz + +#ignore object files and executables +*.o +*.so +*.exe + +#misc +Rprof.out +*.zip diff --git a/README.md b/README.md new file mode 100644 index 0000000..cf08453 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# VAriable seLection with mixtureS of modEls + +This code is a re-writing from a similar project in MATLAB, still online [here](https://git.auder.net/?p=select.git;a=summary). + +Code co-authors: [Emilie Devijver](http://ama.liglab.fr/~devijver/), [Benjamin Gohehry](http://www.math.u-psud.fr/~goehry/). + +This code corresponds to applied parts of the PhD thesis of both co-authors. + +It uses git-fat to store binary objects. "git fat init && git fat pull" will get them. + +## Description + +TODO : see R package + +Trouver un jeu de données (+) intéressant (que les autres) ? +Ajouter toy datasets pour les tests (ou piocher dans MASS ?) + +ED : j'ai simulé un truc basique via 'generateXYdefault(10,5,6,2)' diff --git a/TODO b/TODO new file mode 100644 index 0000000..fed6eff --- /dev/null +++ b/TODO @@ -0,0 +1,15 @@ +n = 100; m = 70; p = 5 +X = matrix(runif(n*p, -10, 10), nrow=n) +Y = matrix(runif(n*m, -5, 15), nrow=n) + +V1 = valse::valse(X, Y, fast=FALSE) +Error in while (!pi2AllPositive) { : + missing value where TRUE/FALSE needed + +V2 = valse::valse(X, Y, fast=TRUE) +list() +Error in out[[ind_uniq[l]]] : + attempt to select less than one element in get1index + +==> Error identified: line 61 in initSmallEM.R, division by 0 +It occurs also for smallers values of n and m, e.g.: n = 20; m = 20; p = 3 diff --git a/biblio/articleMethodeDevijverADAC.pdf b/biblio/articleMethodeDevijverADAC.pdf new file mode 100644 index 0000000..9aad91e --- /dev/null +++ b/biblio/articleMethodeDevijverADAC.pdf @@ -0,0 +1 @@ +#$# git-fat 31158da74f93ba8b9e3883e8d4173e2da02255cf 1930037 diff --git a/biblio/these_Emilie.pdf b/biblio/these_Emilie.pdf new file mode 100644 index 0000000..1262e1d --- /dev/null +++ b/biblio/these_Emilie.pdf @@ -0,0 +1 @@ +#$# git-fat c89e92fb27bcb2c94bc0eddb2af5e724ff676b9a 2049044 diff --git a/hooks/pre-commit.bad b/hooks/pre-commit.bad new file mode 100755 index 0000000..bf5d735 --- /dev/null +++ b/hooks/pre-commit.bad @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Hook used to indent all source files before commiting +# + +# indent / format file by type +indent() { + # getting against as the current commit + if git rev-parse --verify HEAD >/dev/null 2>&1 + then + local against=HEAD + else + # Initial commit: diff against an empty tree object + local against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 + fi + + # loop on modified files + git diff --cached --name-only $against | while read file; + do + local ext=$(expr "$file" : ".*\(\..*\)") + case $ext in + .R|.r) + __indent_R; + ;; + esac + done +} + +# Indent the file with `indent' if this is a R file +__indent_R() { + if test ! -f $file + then + return; + fi + + echo "Indenting " $file + echo "library(formatR);formatR::tidy_source('$file',comment=TRUE,blank=TRUE, + arrow=TRUE,brace.newline=TRUE,indent=2,width.cutoff=80,file='$file')" | R --slave + git add "$file" +} + +indent diff --git a/hooks/pre-push b/hooks/pre-push new file mode 100755 index 0000000..ba815bd --- /dev/null +++ b/hooks/pre-push @@ -0,0 +1,3 @@ +#!/bin/sh + +git-fat push diff --git a/pkg/.gitignore b/pkg/.gitignore new file mode 100644 index 0000000..ddc8772 --- /dev/null +++ b/pkg/.gitignore @@ -0,0 +1,4 @@ +#ignore roxygen2 generated files +/NAMESPACE +/man/*.Rd +!/man/*-package.Rd diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION new file mode 100644 index 0000000..72723c0 --- /dev/null +++ b/pkg/DESCRIPTION @@ -0,0 +1,43 @@ +Package: valse +Title: Variable Selection With Mixture Of Models +Date: 2016-12-01 +Version: 0.1-0 +Description: Two methods are implemented to cluster data with finite mixture + regression models. Those procedures deal with high-dimensional covariates and + responses through a variable selection procedure based on the Lasso estimator. + A low-rank constraint could be added, computed for the Lasso-Rank procedure. + A collection of models is constructed, varying the level of sparsity and the + number of clusters, and a model is selected using a model selection criterion + (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model- + based clustering for high-dimensional data. Application to functional data' by + Emilie Devijver, published in Advances in Data Analysis and Clustering (2016). +Author: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre], + Emilie Devijver <Emilie.Devijver@kuleuven.be> [aut], + Benjamin Goehry <Benjamin.Goehry@math.u-psud.fr> [aut] +Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> +Depends: + R (>= 3.0.0) +Imports: + MASS, + parallel +Suggests: + capushe, + methods, + roxygen2, + testthat +URL: http://git.auder.net/?p=valse.git +License: MIT + file LICENSE +RoxygenNote: 6.0.1 +Collate: + 'plot_valse.R' + 'main.R' + 'selectVariables.R' + 'constructionModelesLassoRank.R' + 'constructionModelesLassoMLE.R' + 'computeGridLambda.R' + 'initSmallEM.R' + 'EMGrank.R' + 'EMGLLF.R' + 'generateXY.R' + 'A_NAMESPACE.R' + 'util.R' diff --git a/pkg/LICENSE b/pkg/LICENSE new file mode 100644 index 0000000..a212458 --- /dev/null +++ b/pkg/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) + 2014-2017, Benjamin Auder + 2014-2017, Emilie Devijver + 2016-2017, Benjamin Goehry + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R new file mode 100644 index 0000000..8e1783e --- /dev/null +++ b/pkg/R/A_NAMESPACE.R @@ -0,0 +1,16 @@ +#' @include generateXY.R +#' @include EMGLLF.R +#' @include EMGrank.R +#' @include initSmallEM.R +#' @include computeGridLambda.R +#' @include constructionModelesLassoMLE.R +#' @include constructionModelesLassoRank.R +#' @include selectVariables.R +#' @include main.R +#' @include plot_valse.R +#' +#' @useDynLib valse +#' +#' @importFrom parallel makeCluster parLapply stopCluster clusterExport +#' @importFrom MASS ginv +NULL diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R new file mode 100644 index 0000000..57638f9 --- /dev/null +++ b/pkg/R/EMGLLF.R @@ -0,0 +1,195 @@ +#' EMGLLF +#' +#' Description de EMGLLF +#' +#' @param phiInit an initialization for phi +#' @param rhoInit an initialization for rho +#' @param piInit an initialization for pi +#' @param gamInit initialization for the a posteriori probabilities +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param lambda regularization parameter in the Lasso estimation +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' +#' @return A list ... phi,rho,pi,LLF,S,affec: +#' phi : parametre de moyenne renormalisé, calculé par l'EM +#' rho : parametre de variance renormalisé, calculé par l'EM +#' pi : parametre des proportions renormalisé, calculé par l'EM +#' LLF : log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres +#' S : ... +#' affec : ... +#' +#' @export +EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, fast) +{ + if (!fast) + { + # Function in R + return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps)) + } + + # Function in C + n <- nrow(X) #nombre d'echantillons + p <- ncol(X) #nombre de covariables + m <- ncol(Y) #taille de Y (multivarié) + k <- length(piInit) #nombre de composantes dans le mélange + .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k), + llh = double(1), S = double(p * m * k), affec = integer(n), n, p, m, k, + PACKAGE = "valse") +} + +# R version - slow but easy to read +.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps) +{ + # Matrix dimensions + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + + # Adjustments required when p==1 or m==1 (var.sel. or output dim 1) + if (p==1 || m==1) + phiInit <- array(phiInit, dim=c(p,m,k)) + if (m==1) + rhoInit <- array(rhoInit, dim=c(m,m,k)) + + # Outputs + phi <- phiInit + rho <- rhoInit + pi <- piInit + llh <- -Inf + S <- array(0, dim = c(p, m, k)) + + # Algorithm variables + gam <- gamInit + Gram2 <- array(0, dim = c(p, p, k)) + ps2 <- array(0, dim = c(p, m, k)) + X2 <- array(0, dim = c(n, p, k)) + Y2 <- array(0, dim = c(n, m, k)) + + for (ite in 1:maxi) + { + # Remember last pi,rho,phi values for exit condition in the end of loop + Phi <- phi + Rho <- rho + Pi <- pi + + # Computations associated to X and Y + for (r in 1:k) + { + for (mm in 1:m) + Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm] + for (i in 1:n) + X2[i, , r] <- sqrt(gam[i, r]) * X[i, ] + for (mm in 1:m) + ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r]) + for (j in 1:p) + { + for (s in 1:p) + Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r]) + } + } + + ## M step + + # For pi + b <- sapply(1:k, function(r) sum(abs(phi[, , r]))) + gam2 <- colSums(gam) + a <- sum(gam %*% log(pi)) + + # While the proportions are nonpositive + kk <- 0 + pi2AllPositive <- FALSE + while (!pi2AllPositive) + { + pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi) + pi2AllPositive <- all(pi2 >= 0) + kk <- kk + 1 + } + + # t(m) is the largest value in the grid O.1^k such that it is nonincreasing + while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < + # na.rm=TRUE to handle 0*log(0) + -sum(gam2 * log(pi2), na.rm=TRUE)/n + lambda * sum(pi2^gamma * b)) + { + pi2 <- pi + 0.1^kk * (1/n * gam2 - pi) + kk <- kk + 1 + } + t <- 0.1^kk + pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi)) + + # For phi and rho + for (r in 1:k) + { + for (mm in 1:m) + { + ps <- 0 + for (i in 1:n) + ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r]) + nY2 <- sum(Y2[, mm, r]^2) + rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2) + } + } + + for (r in 1:k) + { + for (j in 1:p) + { + for (mm in 1:m) + { + S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + + sum(phi[-j, mm, r] * Gram2[j, -j, r]) + if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- 0 + } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r] + } else { + phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r] + } + } + } + } + + ## E step + + # Precompute det(rho[,,r]) for r in 1...k + detRho <- sapply(1:k, function(r) gdet(rho[, , r])) + sumLogLLH <- 0 + for (i in 1:n) + { + # Update gam[,]; use log to avoid numerical problems + logGam <- sapply(1:k, function(r) { + log(pi[r]) + log(detRho[r]) - 0.5 * + sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2) + }) + + logGam <- logGam - max(logGam) #adjust without changing proportions + gam[i, ] <- exp(logGam) + norm_fact <- sum(gam[i, ]) + gam[i, ] <- gam[i, ] / norm_fact + sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2)) + } + + sumPen <- sum(pi^gamma * b) + last_llh <- llh + llh <- -sumLogLLH/n #+ lambda * sumPen + dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh))) + Dist1 <- max((abs(phi - Phi))/(1 + abs(phi))) + Dist2 <- max((abs(rho - Rho))/(1 + abs(rho))) + Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi))) + dist2 <- max(Dist1, Dist2, Dist3) + + if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps))) + break + } + + affec = apply(gam, 1, which.max) + list(phi = phi, rho = rho, pi = pi, llh = llh, S = S, affec=affec) +} diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R new file mode 100644 index 0000000..4054e25 --- /dev/null +++ b/pkg/R/EMGrank.R @@ -0,0 +1,120 @@ +#' EMGrank +#' +#' Description de EMGrank +#' +#' @param Pi Parametre de proportion +#' @param Rho Parametre initial de variance renormalisé +#' @param mini Nombre minimal d'itérations dans l'algorithme EM +#' @param maxi Nombre maximal d'itérations dans l'algorithme EM +#' @param X Régresseurs +#' @param Y Réponse +#' @param eps Seuil pour accepter la convergence +#' @param rank Vecteur des rangs possibles +#' +#' @return A list ... +#' phi : parametre de moyenne renormalisé, calculé par l'EM +#' LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres +#' +#' @export +EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE) +{ + if (!fast) + { + # Function in R + return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, eps, rank)) + } + + # Function in C + n <- nrow(X) #nombre d'echantillons + p <- ncol(X) #nombre de covariables + m <- ncol(Y) #taille de Y (multivarié) + k <- length(Pi) #nombre de composantes dans le mélange + .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, as.integer(rank), phi = double(p * m * k), + LLF = double(1), n, p, m, k, PACKAGE = "valse") +} + +# helper to always have matrices as arg (TODO: put this elsewhere? improve?) --> +# Yes, we should use by-columns storage everywhere... [later!] +matricize <- function(X) +{ + if (!is.matrix(X)) + return(t(as.matrix(X))) + return(X) +} + +# R version - slow but easy to read +.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, eps, rank) +{ + # matrix dimensions + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(Pi) + + # init outputs + phi <- array(0, dim = c(p, m, k)) + Z <- rep(1, n) + LLF <- 0 + + # local variables + Phi <- array(0, dim = c(p, m, k)) + deltaPhi <- c() + sumDeltaPhi <- 0 + deltaPhiBufferSize <- 20 + + # main loop + ite <- 1 + while (ite <= mini || (ite <= maxi && sumDeltaPhi > eps)) + { + # M step: update for Beta ( and then phi) + for (r in 1:k) + { + Z_indice <- seq_len(n)[Z == r] #indices where Z == r + if (length(Z_indice) == 0) + next + # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% + crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ]))) + S <- s$d + # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation + # of the initial product + if (rank[r] < length(S)) + S[(rank[r] + 1):length(S)] <- 0 + phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r] + } + + # Step E and computation of the loglikelihood + sumLogLLF2 <- 0 + for (i in seq_len(n)) + { + sumLLF1 <- 0 + maxLogGamIR <- -Inf + for (r in seq_len(k)) + { + dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r]) + logGamIR <- log(Pi[r]) + log(gdet(Rho[, , r])) - 0.5 * dotProduct + # Z[i] = index of max (gam[i,]) + if (logGamIR > maxLogGamIR) + { + Z[i] <- r + maxLogGamIR <- logGamIR + } + sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2) + } + sumLogLLF2 <- sumLogLLF2 + log(sumLLF1) + } + + LLF <- -1/n * sumLogLLF2 + + # update distance parameter to check algorithm convergence (delta(phi, Phi)) + deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain? + if (length(deltaPhi) > deltaPhiBufferSize) + deltaPhi <- deltaPhi[2:length(deltaPhi)] + sumDeltaPhi <- sum(abs(deltaPhi)) + + # update other local variables + Phi <- phi + ite <- ite + 1 + } + return(list(phi = phi, LLF = LLF)) +} diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R new file mode 100644 index 0000000..ac0788a --- /dev/null +++ b/pkg/R/computeGridLambda.R @@ -0,0 +1,37 @@ +#' computeGridLambda +#' +#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator +#' +#' @param phiInit value for phi +#' @param rhoInit for rho +#' @param piInit for pi +#' @param gamInit value for gamma +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param gamma power of weights in the penalty +#' @param mini minimum number of iterations in EM algorithm +#' @param maxi maximum number of iterations in EM algorithm +#' @param eps threshold to stop EM algorithm +#' +#' @return the grid of regularization parameters +#' +#' @export +computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, + maxi, eps, fast) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + + list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, + X, Y, eps, fast) + + grid <- array(0, dim = c(p, m, k)) + for (j in 1:p) + { + for (mm in 1:m) + grid[j, mm, ] <- abs(list_EMG$S[j, mm, ])/(n * list_EMG$pi^gamma) + } + sort(unique(grid)) +} diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R new file mode 100644 index 0000000..d2a16bc --- /dev/null +++ b/pkg/R/constructionModelesLassoMLE.R @@ -0,0 +1,113 @@ +#' constructionModelesLassoMLE +#' +#' Construct a collection of models with the Lasso-MLE procedure. +#' +#' @param phiInit an initialization for phi, get by initSmallEM.R +#' @param rhoInit an initialization for rho, get by initSmallEM.R +#' @param piInit an initialization for pi, get by initSmallEM.R +#' @param gamInit an initialization for gam, get by initSmallEM.R +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param S output of selectVariables.R +#' @param ncores Number of cores, by default = 3 +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with several models, defined by phi, rho, pi, llh +#' +#' @export +constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, + maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose) +{ + if (ncores > 1) + { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit", + "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", + "ncores", "fast", "verbose")) + } + + # Individual model computation + computeAtLambda <- function(lambda) + { + if (ncores > 1) + require("valse") #nodes start with an empty environment + + if (verbose) + print(paste("Computations for lambda=", lambda)) + + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + sel.lambda <- S[[lambda]]$selected + # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix + col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars + if (length(col.sel) == 0) + return(NULL) + + # lambda == 0 because we compute the EMV: no penalization here + res <- EMGLLF(array(phiInit,dim=c(p,m,k))[col.sel, , ], rhoInit, piInit, gamInit, + mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast) + + # Eval dimension from the result + selected + phiLambda2 <- res$phi + rhoLambda <- res$rho + piLambda <- res$pi + phiLambda <- array(0, dim = c(p, m, k)) + for (j in seq_along(col.sel)) + phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ] + dimension <- length(unlist(sel.lambda)) + + ## Affectations + Gam <- matrix(0, ncol = length(piLambda), nrow = n) + for (i in 1:n) + { + for (r in 1:length(piLambda)) + { + sqNorm2 <- sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + Gam[i, r] <- piLambda[r] * exp(-0.5 * sqNorm2) * det(rhoLambda[, , r]) + } + } + Gam2 <- Gam/rowSums(Gam) + affec <- apply(Gam2, 1, which.max) + proba <- Gam2 + LLH <- c(sum(log(apply(Gam,1,sum))), (dimension + m + 1) * k - 1) + # ## Computation of the loglikelihood + # # Precompute det(rhoLambda[,,r]) for r in 1...k + # detRho <- sapply(1:k, function(r) gdet(rhoLambda[, , r])) + # sumLogLLH <- 0 + # for (i in 1:n) + # { + # # Update gam[,]; use log to avoid numerical problems + # logGam <- sapply(1:k, function(r) { + # log(piLambda[r]) + log(detRho[r]) - 0.5 * + # sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + # }) + # + # #logGam <- logGam - max(logGam) #adjust without changing proportions -> change the LLH + # gam <- exp(logGam) + # norm_fact <- sum(gam) + # sumLogLLH <- sumLogLLH + log(norm_fact) - m/2* log(2 * base::pi) + # } + #llhLambda <- c(-sumLogLLH/n, (dimension + m + 1) * k - 1) + list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = LLH, affec = affec, proba = proba) + } + + # For each lambda, computation of the parameters + out <- + if (ncores > 1) { + parLapply(cl, 1:length(S), computeAtLambda) + } else { + lapply(1:length(S), computeAtLambda) + } + + if (ncores > 1) + parallel::stopCluster(cl) + + out +} diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R new file mode 100644 index 0000000..dc88f67 --- /dev/null +++ b/pkg/R/constructionModelesLassoRank.R @@ -0,0 +1,95 @@ +#' constructionModelesLassoRank +#' +#' Construct a collection of models with the Lasso-Rank procedure. +#' +#' @param S output of selectVariables.R +#' @param k number of components +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1 +#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5 +#' @param ncores Number of cores, by default = 3 +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with several models, defined by phi, rho, pi, llh +#' +#' @export +constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, + ncores, fast, verbose) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + L <- length(S) + + # Possible interesting ranks + deltaRank <- rank.max - rank.min + 1 + Size <- deltaRank^k + RankLambda <- matrix(0, nrow = Size * L, ncol = k + 1) + for (r in 1:k) + { + # On veut le tableau de toutes les combinaisons de rangs possibles, et des + # lambdas Dans la première colonne : on répète (rank.max-rank.min)^(k-1) chaque + # chiffre : ça remplit la colonne Dans la deuxieme : on répète + # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ça (rank.max-rank.min)^2 + # fois ... Dans la dernière, on répète chaque chiffre une fois, et on fait ça + # (rank.min-rank.max)^(k-1) fois. + RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1), + each = deltaRank^(k - r)), each = L) + } + RankLambda[, k + 1] <- rep(1:L, times = Size) + + if (ncores > 1) + { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size", + "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", + "verbose")) + } + + computeAtLambda <- function(index) + { + lambdaIndex <- RankLambda[index, k + 1] + rankIndex <- RankLambda[index, 1:k] + if (ncores > 1) + require("valse") #workers start with an empty environment + + # 'relevant' will be the set of relevant columns + selected <- S[[lambdaIndex]]$selected + relevant <- c() + for (j in 1:p) + { + if (length(selected[[j]]) > 0) + relevant <- c(relevant, j) + } + if (max(rankIndex) < length(relevant)) + { + phi <- array(0, dim = c(p, m, k)) + if (length(relevant) > 0) + { + res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, + X[, relevant], Y, eps, rankIndex, fast) + llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m))) + phi[relevant, , ] <- res$phi + } + list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho) + } + } + + # For each lambda in the grid we compute the estimators + out <- + if (ncores > 1) { + parLapply(cl, seq_len(length(S) * Size), computeAtLambda) + } else { + lapply(seq_len(length(S) * Size), computeAtLambda) + } + + if (ncores > 1) + parallel::stopCluster(cl) + + out +} diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R new file mode 100644 index 0000000..064b54b --- /dev/null +++ b/pkg/R/generateXY.R @@ -0,0 +1,39 @@ +#' generateXY +#' +#' Generate a sample of (X,Y) of size n +#' +#' @param n sample size +#' @param Ï proportion for each cluster +#' @param meanX matrix of group means for covariates (of size p) +#' @param covX covariance for covariates (of size p*p) +#' @param β regression matrix, of size p*m*k +#' @param covY covariance for the response vector (of size m*m*K) +#' +#' @return list with X and Y +#' +#' @export +generateXY <- function(n, Ï, meanX, β, covX, covY) +{ + p <- dim(covX)[1] + m <- dim(covY)[1] + k <- dim(covY)[3] + + X <- matrix(nrow = 0, ncol = p) + Y <- matrix(nrow = 0, ncol = m) + + # random generation of the size of each population in X~Y (unordered) + sizePop <- rmultinom(1, n, Ï) + class <- c() #map i in 1:n --> index of class in 1:k + + for (i in 1:k) + { + class <- c(class, rep(i, sizePop[i])) + newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX) + X <- rbind(X, newBlockX) + Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% + β[, , i], covY[, , i])))) + } + + shuffle <- sample(n) + list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle]) +} diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R new file mode 100644 index 0000000..01147d7 --- /dev/null +++ b/pkg/R/initSmallEM.R @@ -0,0 +1,80 @@ +#' initialization of the EM algorithm +#' +#' @param k number of components +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' +#' @return a list with phiInit, rhoInit, piInit, gamInit +#' @export +#' @importFrom methods new +#' @importFrom stats cutree dist hclust runif +initSmallEM <- function(k, X, Y, fast) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + nIte <- 20 + Zinit1 <- array(0, dim = c(n, nIte)) + betaInit1 <- array(0, dim = c(p, m, k, nIte)) + sigmaInit1 <- array(0, dim = c(m, m, k, nIte)) + phiInit1 <- array(0, dim = c(p, m, k, nIte)) + rhoInit1 <- array(0, dim = c(m, m, k, nIte)) + Gam <- matrix(0, n, k) + piInit1 <- matrix(0, nIte, k) + gamInit1 <- array(0, dim = c(n, k, nIte)) + LLFinit1 <- list() + + # require(MASS) #Moore-Penrose generalized inverse of matrix + for (repet in 1:nIte) + { + distance_clus <- dist(cbind(X, Y)) + tree_hier <- hclust(distance_clus) + Zinit1[, repet] <- cutree(tree_hier, k) + + for (r in 1:k) + { + Z <- Zinit1[, repet] + Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r + if (length(Z_indice) == 1) { + betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% + crossprod(t(X[Z_indice, ]), Y[Z_indice, ]) + } else { + betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% + crossprod(X[Z_indice, ], Y[Z_indice, ]) + } + sigmaInit1[, , r, repet] <- diag(m) + phiInit1[, , r, repet] <- betaInit1[, , r, repet] #/ sigmaInit1[,,r,repet] + rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet]) + piInit1[repet, r] <- mean(Z == r) + } + + for (i in 1:n) + { + for (r in 1:k) + { + dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] + - X[i, ] %*% phiInit1[, , r, repet]) + Gam[i, r] <- piInit1[repet, r] * + det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct) + } + sumGamI <- sum(Gam[i, ]) + # TODO: next line is a division by zero if dotProduct is big + gamInit1[i, , repet] <- Gam[i, ]/sumGamI + } + + miniInit <- 10 + maxiInit <- 11 + + init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ], + gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y, + eps = 1e-04, fast) + LLFinit1[[repet]] <- init_EMG$llh + } + b <- which.min(LLFinit1) + phiInit <- phiInit1[, , , b] + rhoInit <- rhoInit1[, , , b] + piInit <- piInit1[b, ] + gamInit <- gamInit1[, , b] + + return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit)) +} diff --git a/pkg/R/main.R b/pkg/R/main.R new file mode 100644 index 0000000..387d553 --- /dev/null +++ b/pkg/R/main.R @@ -0,0 +1,152 @@ +#' valse +#' +#' Main function +#' +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param procedure among 'LassoMLE' or 'LassoRank' +#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC' +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param kmin integer, minimum number of clusters, by default = 2 +#' @param kmax integer, maximum number of clusters, by default = 10 +#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1 +#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5 +#' @param ncores_outer Number of cores for the outer loop on k +#' @param ncores_inner Number of cores for the inner loop on lambda +#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 +#' @param grid_lambda, a vector with regularization parameters if known, by default numeric(0) +#' @param size_coll_mod (Maximum) size of a collection of models +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with estimators of parameters +#' +#' @examples +#' #TODO: a few examples +#' @export +valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, + maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, + ncores_inner = 1, thresh = 1e-08, grid_lambda = numeric(0), size_coll_mod = 10, + fast = TRUE, verbose = FALSE, plot = TRUE) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + + if (verbose) + print("main loop: over all k and all lambda") + + if (ncores_outer > 1) { + cl <- parallel::makeCluster(ncores_outer, outfile = "") + parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", + "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", + "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", + "size_coll_mod", "verbose", "p", "m")) + } + + # Compute models with k components + computeModels <- function(k) + { + if (ncores_outer > 1) + require("valse") #nodes start with an empty environment + + if (verbose) + print(paste("Parameters initialization for k =", k)) + # smallEM initializes parameters by k-means and regression model in each + # component, doing this 20 times, and keeping the values maximizing the + # likelihood after 10 iterations of the EM algorithm. + P <- initSmallEM(k, X, Y, fast) + if (length(grid_lambda) == 0) + { + grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, + X, Y, gamma, mini, maxi, eps, fast) + } + if (length(grid_lambda) > size_coll_mod) + grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)] + + if (verbose) + print("Compute relevant parameters") + # select variables according to each regularization parameter from the grid: + # S$selected corresponding to selected variables + S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, + gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast) + + if (procedure == "LassoMLE") { + if (verbose) + print("run the procedure Lasso-MLE") + # compute parameter estimations, with the Maximum Likelihood Estimator, + # restricted on selected variables. + models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, + P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose) + } else { + if (verbose) + print("run the procedure Lasso-Rank") + # compute parameter estimations, with the Low Rank Estimator, restricted on + # selected variables. + models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min, + rank.max, ncores_inner, fast, verbose) + } + # warning! Some models are NULL after running selectVariables + models <- models[sapply(models, function(cell) !is.null(cell))] + models + } + + # List (index k) of lists (index lambda) of models + models_list <- + if (ncores_outer > 1) { + parLapply(cl, kmin:kmax, computeModels) + } else { + lapply(kmin:kmax, computeModels) + } + if (ncores_outer > 1) + parallel::stopCluster(cl) + + if (!requireNamespace("capushe", quietly = TRUE)) + { + warning("'capushe' not available: returning all models") + return(models_list) + } + + # Get summary 'tableauRecap' from models + tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i) + { + models <- models_list[[i]] + # For a collection of models (same k, several lambda): + LLH <- sapply(models, function(model) model$llh[1]) + k <- length(models[[1]]$pi) + sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[, + , 1] != 0) + 1) - 1) + data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, + complexity = sumPen, contrast = -LLH) + })) + tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ] + + if (verbose == TRUE) + print(tableauRecap) + modSel <- capushe::capushe(tableauRecap, n) + indModSel <- if (selecMod == "DDSE") + { + as.numeric(modSel@DDSE@model) + } else if (selecMod == "Djump") + { + as.numeric(modSel@Djump@model) + } else if (selecMod == "BIC") + { + modSel@BIC_capushe$model + } else if (selecMod == "AIC") + { + modSel@AIC_capushe$model + } + + listMod <- as.integer(unlist(strsplit(as.character(indModSel), "[.]"))) + modelSel <- models_list[[listMod[1]]][[listMod[2]]] + modelSel$tableau <- tableauRecap + + if (plot) + print(plot_valse(X, Y, modelSel, n)) + + return(modelSel) +} diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R new file mode 100644 index 0000000..ec2302d --- /dev/null +++ b/pkg/R/plot_valse.R @@ -0,0 +1,89 @@ +#' Plot +#' +#' It is a function which plots relevant parameters +#' +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param model the model constructed by valse procedure +#' @param n sample size +#' @return several plots +#' +#' @examples TODO +#' +#' @export +#' +plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA) +{ + require("gridExtra") + require("ggplot2") + require("reshape2") + require("cowplot") + + K <- length(model$pi) + ## regression matrices + gReg <- list() + for (r in 1:K) + { + Melt <- melt(t((model$phi[, , r]))) + gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + + geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", + midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r)) + } + print(gReg) + + ## Differences between two clusters + if (comp) + { + if (is.na(k1) || is.na(k)) + print("k1 and k2 must be integers, representing the clusters you want to compare") + Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2])) + gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle(paste("Difference between regression matrices in cluster", + k1, "and", k2)) + print(gDiff) + } + + ### Covariance matrices + matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K) + for (r in 1:K) + matCov[, r] <- diag(model$rho[, , r]) + MeltCov <- melt(matCov) + gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle("Covariance matrices") + print(gCov) + + ### Proportions + gam2 <- matrix(NA, ncol = K, nrow = n) + for (i in 1:n) + gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i]) + + bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + + geom_boxplot() + + theme(legend.position = "none") + + background_grid(major = "xy", minor = "none") + print(bp) + + ### Mean in each cluster + XY <- cbind(X, Y) + XY_class <- list() + meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2]) + for (r in 1:K) + { + XY_class[[r]] <- XY[model$affec == r, ] + if (sum(model$affec == r) == 1) { + meanPerClass[, r] <- XY_class[[r]] + } else { + meanPerClass[, r] <- apply(XY_class[[r]], 2, mean) + } + } + data <- data.frame(mean = as.vector(meanPerClass), + cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K)) + g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster)) + print(g + geom_line(aes(linetype = cluster, color = cluster)) + + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster")) +} diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R new file mode 100644 index 0000000..bab45cc --- /dev/null +++ b/pkg/R/selectVariables.R @@ -0,0 +1,83 @@ +#' selectVariables +#' +#' It is a function which construct, for a given lambda, the sets of relevant variables. +#' +#' @param phiInit an initial estimator for phi (size: p*m*k) +#' @param rhoInit an initial estimator for rho (size: m*m*k) +#' @param piInit an initial estimator for pi (size : k) +#' @param gamInit an initial estimator for gamma +#' @param mini minimum number of iterations in EM algorithm +#' @param maxi maximum number of iterations in EM algorithm +#' @param gamma power in the penalty +#' @param glambda grid of regularization parameters +#' @param X matrix of regressors +#' @param Y matrix of responses +#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 +#' @param eps threshold to say that EM algorithm has converged +#' @param ncores Number or cores for parallel execution (1 to disable) +#' +#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi +#' +#' @examples TODO +#' +#' @export +#' +selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, + glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast) +{ + if (ncores > 1) { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", + "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment()) + } + + # Computation for a fixed lambda + computeCoefs <- function(lambda) + { + params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, fast) + + p <- ncol(X) + m <- ncol(Y) + + # selectedVariables: list where element j contains vector of selected variables + # in [1,m] + selectedVariables <- lapply(1:p, function(j) { + # from boolean matrix mxk of selected variables obtain the corresponding boolean + # m-vector, and finally return the corresponding indices + if (m>1) { + seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)] + } else { + if (any(params$phi[j, , ] > thresh)) + 1 + else + numeric(0) + } + }) + + list(selected = selectedVariables, Rho = params$rho, Pi = params$pi) + } + + # For each lambda in the grid, we compute the coefficients + out <- + if (ncores > 1) { + parLapply(cl, glambda, computeCoefs) + } else { + lapply(glambda, computeCoefs) + } + if (ncores > 1) + parallel::stopCluster(cl) + + print(out) + # Suppress models which are computed twice En fait, ca ca fait la comparaison de + # tous les parametres On veut juste supprimer ceux qui ont les memes variables + # sélectionnées + # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ] + selec <- lapply(out, function(model) model$selected) + ind_dup <- duplicated(selec) + ind_uniq <- which(!ind_dup) + out2 <- list() + for (l in 1:length(ind_uniq)) + out2[[l]] <- out[[ind_uniq[l]]] + out2 +} diff --git a/pkg/R/util.R b/pkg/R/util.R new file mode 100644 index 0000000..f8b01cc --- /dev/null +++ b/pkg/R/util.R @@ -0,0 +1,7 @@ +# ... +gdet <- function(M) +{ + if (is.matrix(M)) + return (det(M)) + return (M[1]) #numeric, double +} diff --git a/pkg/data/data.RData b/pkg/data/data.RData new file mode 100644 index 0000000000000000000000000000000000000000..a9f09e143adfa04f595a40b993efa688ebd05a4b GIT binary patch literal 1010 zcmV<O0}cEiiwFP!0000016`77P}Bt&#{atu>p&WZl>sV;MkEktY=rRsmk}pa$Ymf| zh%+!j5MfYaWQj0NjSw<W#7KlV8WM{`0pF!YkSkzVt}S3$Smbiq1BGQdTC-1lc;4rk zc|Se#d%TYL+G_f05(GgdXf!H8rKzGCRdpzY8lmyheO15LRlyd-i{9N%Pn7s<BPHir zem067C<~@1^<ag?u=m+|z$J>Z8tauaP}k|=7GfR?3xiC$wy8cy^2&Ku+%x#Sd9>t$ zLmDm^dp4WeYeFCEnoB`wICgw3@@~y4gPhV+@xcK*P%!AO`$5DRkTD)oXS7Ic^{*?? zW@d4z1<kq#IHzGbCRwVCN{8;*VXGFU9OwLO*iN3=Ft>MdXuj-ykhk3R&r1y<X;)(S zSN+Se);sX?v^X)2eCzM?DE}Aa2O1Phg}q!#gS%{0ZbK6NQB5Kt8-%5Q2@<^9peO2! zh6v+}IObGktlhW=$M^k_mdn$?nY!Z|J`#7FNFDD@Pp-u==N|UL4~6*8;e;ORI6aW; zw~I^aECTsi!A(c|HB``J?r)^;hqdmqub6&q_~MQFjF3~R-i%A4wzus_y45W`UtudO zd79nLuxNvtgNf`W?QvK+B6o{fKaPr&Z;lyfKgX5T56KgIf<O@Tyf0Xw11qsEvSerO zHc+&%h-j7V`i~_+ykx4b(&l!a?EiTl#+HauVPU|>Frc`xn~9sSvAlM@y(*Vfi)3U6 z6_Z?=CI9l)hCa+Y{C)q>;4LnlH&A!>UOY(J`%@WB*Kw+7R_I|=538Xe^IaxaV6LQE z${Gm5C2JFkVyl*<7&)30u6P0er*eaKb_bUdxM{YW--RQw)~7G!8eyO2PzAB0kEF)t z?XHy_!ZphSLN<FAq&ssSewxlgv7s5$Cbb8KlAU<0{!~)UHaLjEv4o|7Xs0Hz7RqXl z@g3~}r&+6t6xUi@zY%TWb?#kqSLQqFItEFotc*6R{4p8C#Wq6$Z+d`e5cXEN{)y7_ zTCR682T>wX1fD4Q6{c8^!k<;Mak*tgWMAQeqci5;=}*!@7W0`L#=>CsH@lls&U?r! zc@iF6c@hNYlSY*_w~^;y75Te$4sIR|=P-7f!9+gWnOy2Zp&>Z7CWgV{3%AEc$_DH_ zI>bpVy9f&=2Rmjjlt7nvW^AOd048cSvZ`JygnrlCayPGi$Zr}>`8kaSB7KiJ-K~$X zKP>8Li?SOh8ynTuGYsIVqt0dNolu+}p1ZeL@fJ*RK52ejSBxS*_1**vLzoeFJ*W;i g2h$j?5gPyHEAZ5<ZKI&S8U?ZWFExW2g!}^l0QK<asQ>@~ literal 0 HcmV?d00001 diff --git a/pkg/data/data2.RData b/pkg/data/data2.RData new file mode 100644 index 0000000000000000000000000000000000000000..80003e3c090402d85e308401bb23159ebe1d53ca GIT binary patch literal 18385 zcmV(vK<d9AiwFP!000001MR#AP*p**E=tZIAOfNYh$7j9hy;5B0xF6GML`T8ASy`| zL=XfeDS{#(K}E?(Qb2+r^d{$=bIv(uc${<Yd3DcU_kXwEdv#ycyY+X~+S7aWnr~)S zubJ*%y=pYi-8eyagN}rRgp7oooQ#Bwe21kV+xa0Sp&+6Cw?F6fpX%?3ckl2!LNcbE zVg!oZ`d;fjG$D|>9w!<qlw$0@3dI*Pxj0JFy_n*D5BmA#6HaMP!EkCxMI{?O)`TP& zPcwv|4YN=|hR11WsrLU&mhcKEd<6Wn1KvS@=%di;_XZFb*X<a6ivkKny;ncg`=iU) z``e~J>7o5D=RJ-5M3~85y5TpT4>flw+^-(2AySBs3Udj4#=a}%qfT$rVfI8z-ra~% zB1xchqLg+qfo#aw;oL|PtlrVP(vazZfxnE>z8k!Q1sa;lxS#eg*2mVMqI4QZjXz&$ z=U5<+SF6AIny?qAF1q<2=_SRMapSSLsP{Nd^Vu_(`zA~~Uv8{9l?Y3(FE5@^|A`~{ zL(C#dNjMO5<Z|hkheXPltxuXhS71(qw|0rk1Zz&FZJqsd5mJrQ5>t(z;!yk5<;1I0 zutCP|XY;iM#;nxnGzG@6+1fjqucrg1pE)!fN`JD`?%oNV%hWhG)M(%sVFdj=jqw9w z!7ywawBTE2kALO!Xt#}pVa`?XTlryEEWcu=okqul?e3d{GULy1{9yZ@yciW|Y4vqa zmKem*^iulLv)<UQo@0D_ARp#NXV+x){NV3zVJ6=r95A96-JC6^ip2)^jSd^g!7%Ua zQk3)t^xgMRG!9sYS@$W&qKSB%ZS$&pb+--Xdm=SL8Q9?O;j~BbZ0<Oxe{T03_5uQl z!|zum`oXv&k`iuuqZ|5pc~~t}t8wK{nE0j%4fMsonK>V^2m^v&+WQ)ka82~-)A`af z(D?nb*GUz62xqo6C-*YM^j(F{Jyl)U+KaLd*X3ZVBQof(Jr|lK|LA1VC&6vai8fs| zQtT6nW%E+!z_EY}F3wtZVC(yM@seFH^oyk&u=j1n>2bkJw!RWrC;US?EJYYLfAS6$ z*@Z!>$)Q_n3kEn}@g*R3ngW}&9A}`40*1m;64f=%!CW5yletb(Y_Q%M3DkRzjeEFH zf2JD2!7JR~u6`DT7QPSe-&Q7ZrY}cxZTT)Vi(J-QKV5`#N^Cq$1ztoFyw_lw!GX4k zKW-ZaghM{*_;b&`0%*<p70BAX7w1H}C<1i{V3c0}ZR%Gq?7!$H+ZPlH!y>NtW}D1m zm0ersEqe<t&srxG+>6C^CgIOvRvNI;u3Jn>Y6nGq64c7Fe_>kH@A2NvT3n1J(Ykos z3-H*Go5~y&wk%x+)`MqpINpL~m*YKHYdm6+?P!aG(=OxnSsFOUSoZZ#`XShOF#q}& z-5F?lzCSy!<1O@4X>DAdUWe8WPhp*03LHDT_40gGDGW3kIlId}g}Jrm*HdP(P_rlF z!F`+C*vGN?_V9;aSaFk{g>sh$Haxybmc(F-bJ6jF)zKoj_WZHHG(j7N&gxt4K23!^ z)*VEPgg_Yckf{1Y9Rf2ql=sO|X=24Bt%EVc64bHF=kecrid*t2Pt(Ki!`#tm;a0m{ z*prof=bXnkBB{f1+0$BHm@iLrj`CI;u4r5TD(g{zNwX7%Q1=D<!<ZB{;-6v9v%YwN zOY7Kta{tQ2Rs-~%y|0{)9{4MOb?#yD6&Q;wDPHq`4y)lwc=<pM=Eq-Y+#HvNf+kf9 zY6(T?R@6PdsGp9@wr0`s8XVX`(#D&#o<SgCkb8MS?=JKd^c*3SFW{_T+^!9y6<jvB zNdIO^2I?Eiz0O8D!yt~=hW;_YF}||vipLl5$F3RCpleTXfiKOFN%a%d4b~P8I@Lpv zh~u61+%!n$JYL!%`yBiv=-azSMj`!4g~slAYMlT4EC6;TA~90<o=o;4wpOV|Er(vm zxwK4K*;y}G(A=|lURx4J9lM3tCv~7^^>$1Eixgx#*#xN`XM{DKzLT;im!Vf~{Ro#@ z437LQ=2z0b0bR2BF$GEGIGJ!slYW&JN5omfnmuQsC8+bbQ_?Zqc;ULNKU4x8MA7v# z-s(7S;2dMw+XaKPo*g##f523_!nOE2{y4BO_04rI3Tnyihc-T0Le||6-DeMU;Exra z6I*){phd^;o);|x7H7z5^raR+bd2FXGtV!udWurS!g2{4Z)I`$7q~!{74d4Oof=NA zFj~~wT4B4VyQJf<Dd<%@rC_H17~2SP>v6ko!Fu$Mf;v-fSc_*FeaawBBwg$!2<dQQ zSA{}7EA4%#vZL~-REUS=uacjJl@G&A<mEHFr0?L2*}Gj~J{h=VTE>62rU^RSS5ChT zb|sRZ5xC^e_ZgcmD}TPOk%nV~+_(0a?7<c<;~=ZNFW92w{QMB}Md-f4(iOBhgxz!* za`Ms)Fd2Lyt(S8M7t-vM4;gO3HifOI{UKWLoLhLeT-pLtVaJacH6Dkx?NmdCxoylg z3nVht9)baliFdwb`M4yR@J5OM4}p~Kp58@`WJq@NJYoO(1ddq0VBDXmg;5Dt@9X3U zWB7j8W*W=01X5pP7sJV8*mU2c-^aWUGYnooo2_+%D)RV>=Qn$>hw^lJxONI)R<+4$ z+a>7iScuwi-s%55No7RN7PQ5R<k7fPV8w}z+}xEH_|+U;1d=PDI5F<*X7f1C{Cs}o zSHd4$u+Mp4bu|=cC<E?)+ux6?L5`&jgKw}&W#Wad?Q2-Bjq!L{JO}CaWgjXu9B`3j z=yCHU3M^`5J>Z{m0ctoK!#-=O!FWi(Oc{e5PG1kW%4%DN^Wg?UxfV${70~-fUG@je z-@<!GM|hC<jHYFVd=h4_oha<w?F(UI#dQ^dx1nhM)1$iw8=yhub$rS808BZFOQq9k z<Kn8J)t7@+Fe`V8<6y!?ocZOmORtLuacc9y8M1Q(l6ifZL7`k&c_5IKXVwf0@%`Mg zAO;J)(k?{aZfF|Sqt>i{i^F~@&$5I?U|7K`g8nog4mNlFR3J0JMe&wHMVu$Fqctqp z<%l<|UR=#OJspA*$33o7ktjpq!-EeE2E}pPSn0v75_T928n)FG*aLH)q>hOOtYItr zu#&-YD$ch*j+Lg##MzF!JU+cXP#~@vV!cQQB~OIo6gXONPMRf$YK9sXZ%}KD1yw_O zPIu5piX<GjTs*fdw~3SN^CRzvSFtdN?ZVO01Xx|J8$2D8iX*-y&rGOvaLYxZgE&ir z!`43nuGcF;<7c5m+cv+jmDA!{%ljhi@ewVl6imQzb@IB8k2j#RwwAD2$&I-g3H!w} z6L7F_UuPkO6OmG}x`=D<O^m0PJ!Cm+h4X#eF_y{X(4ofbhnjZq^N-=feKt1<B$2P? z<ii*VBo|0#gBKrR=hR&8La7gAIi^uj7)Zg))e23X?;l{)`_|d_t<x~PJ|?7PM1~VH zXI=(by@66C!{CF91K4|t{n68K5(uiTpLuax7pBT`nN3OzAwNM-%wp9I+gxd8>{Yfg zb3{St)TJc&mMB8`MMVv4hR!idwz3jQQq-kQVi0GyFL#6)mEiajN?X^;EnFpC7ae5X zfT`g#BX{P;p~3jqXw8-$F8r=nepfGvi#e(TIV7tDiU3mo!6scCHr_A9lqL@I*>zdF zM*cuul=#*aZdPcCQE-qbz7E4`95?GZdazTn|BlZ#J4Qd+Jmy+y05%7-?f6;SaA|bc zne6Y~FkI2znX6fVGc>>SJ>AaXNZf0SJZ4##J7YoQqge*x-L>2$pIn$fyTmR*@PeKj z?#nW>uV918?qtP$6pkh8gfYBJf$8*oiO%*Om}5U`T5GM0MN|^Hsd+;%SWa#zLqd+- z)1^oM@QPuZ@xU8xOI4U@`dTw_?IDg_(B@hU7J??BM~-PCAvjrM?CtzV3|8%&+MA~6 zq1Mrz>S&WV3}2VA{m{@&pzA(rWUg9+`6ZjA&U*0#GEbGygjZS6nz^Unq?sRjnce%6 z$#&u7ZL7NT?b$HE>=`y*-U{gj#5`&3PdKgDO)6+fg-s>RE9)zixV+DI;f1>-E-jvY z!zU__4L)C`DBV9{ky=Olr1c1_z0JQo`s*n4$joW-O=QE*mx@|n9-bhQFh3)n|9Jvu zk!fnwH3b@1@7!j|JdO)ri+y&HRl(wV|6c#PqcHQnE9F+YF6L6zOk24(!eFd#7RNRp z3{%T#{W)|BMsz#kzE3_SP#F39+NOFyW#?ozOQ9;Ho`1pcLt6y?Ud*yOyzeWN%GmO< zR#w3zv!i3M7!^#_{|KQupNJc+y`guX-iDC{KUN9_UKpkjV{eOn39CM&cQefFaqv}L zjd!0i&LnG*YrVPzlNw_#=XG`w$i+-2o#;}DWI^OV1(^zAL8SA3|NVZ*eYP|-yLJd8 zE$o(3e}9Djrl(gQe6NMRJF8~b;(1_I%|GY&Yi$^aYH)g8QjSyf#cTJYcb<djTfw!- zMfhanCsVD}7R>lk4hEly#ZQLBs+4YVoUD&H9&vx2K&g1?)3(?$L?4@27YaCpL#{Gy zUdxxD>M+$vuEasCWLnX_oZ$w&`$J>+lxDFmLS*xX%OZ?P?qlC4p$dt^K0cp3F5^;Z zR%V3kH(a9ZAg$I?0^;j#*Rhf|-01oArlR~ZWRUC~wih2G(y*&>*-+-;bj;ee)k!Cq zprw*9s6GcvKaC$|?K=UbwY@|)b61=qEI*iHD}xbnz7lumAOh)4a>Ih`K$vLva#XdB zfx!`T_c!jFFhPI$y3KQQ4E`izety#kW^DF9XS7nr*^vk0`v*i}@uB@ICL=3c+`jXn z;l4GrRSpQ66$|4cQ{ATv4|n{46H6Qo{+95Elr#I%U?J4$gl}H3XCP3V@aQ%ELBL-p z^YlVV#Gqk;`L3NKEmWZA&yVZRao~Pk=p8ph0_EEHNSkm64)AwRHeY)Ujg!9>KC-dH zIJH8F_Tg_t%CCyoLY1{}BiJ-Q-|8~<A2c`_tI7)QwAc1d>Y8I#W3ny}bv&+cT58?7 zlminc?|(h+JBADNPPaDs?qkz{+-tLYj?k5D1-|hs&?*!gDAe^B`#et_VRUiBp55x{ zcKO#}CfwO_UrZ&Wlq<2nGbzCCqH~TOk!{dBPrm!FnLdmc$#I^~vx1oSGYxeoK42Q_ z(@*Sox?qHl!YEVuHl(z79Cx~+0X@z2y36;tfp~D2c;ihT*lH~%4?NWbU(}8<(ecYd zd(X=gPmUGh=<9g3sK=(TC7{pv<k>3J7#K*5^*caM^^k(Z;fL6vU`<QI_zOBUxDQ8R z1Fn#KY_I-m4E=?F6}Lm?p~i*o(2Fd2oY1taemE!qtp#i^i~FNt8h+hP`bdps9lN$D z-sfUPNa=msNNOBudstX1GXyg}QD^!C-V;gJgv0iozl>}21;R8V?{TtFw~}$B8de`Y zeD9)n3~Fi3wvQzmU{l$bR#8(kC|^!|PHa(y;=1oY_x6<GVqik&ZM+3N8NDwrcprty zH`SUj(FbcaK~FM5MX_tG#p;lLAWZW74Zl`l1amxJ?qnKWgrer%EtE@z&?7hdy+&on zzF()BoC|ypYYGELS~%iy^}&==pwSc#FFH7SDF)yKugdig`h{5e{cF8*3J0$6x!oSv zYmIZSg(m2fN};bzEUI(iJx)^>=t&nA!}ur}*R2Q@>^Y#eQ5jHyQ<dY7xy2BdS*zQ< zUw*~@Nh2wicfq*fk!B-XB8b!ERzrT-9MCwikM+UIQ<ym*&rdO9fO9iJ7~Ja*HRQeT z&$8Ud(bma(n)6NA^foA~!m<N<{6G0$|MLZM&*!w4==)-KfELn8T*Z_W0Y>BAHf)(K zG`PW_34^x;EAY@xT)6+^w#)moM9P<XHi2Zfq0yCbt+vk;$M!QniTRTSlOY28?dA1h zJ(B+I*!Mh~dAWVWRq+C1>vY;sl{wBX$)#vV>*GMl=|{$=3~<Qp+%6SaKj?ltJP=}i z9fth-4HEm}p<|$@iCtkCe+64#J~F}q#LLxUYL3a6&hp00(jpS(HnsX9e~&@wFna^T z-DPO9Rd9XPzl<XeapYn`-Pl0k$}JSc0}UkQk4NOM<I<xK$G^oKCy)#${q}PHjbnw+ zGTX}+Az8QL>#1CE7+3ifa`a&hbQ!3}HrzY{#QhB2Eahdm!Ybusl3#%Rr}+=)c2nWn zO)Fcuk{y3Bkn_y}1__+M)Ny4jwF_5OKc*j{s(}_sPfOCzMOadl!pb)ziRF*(ObXIu z5=r>qEc7u6V`usFp9%UpoMxs;Q2Fx>=i+@oXIPwo5sJ6<GY;#xd|#$F>3|iqFe!?E zJD~y11!ouSh*=o5H%;C(M-p0U6eLTvIk2Gru<xC!U~JuT4t=&q3KrDf7rm2S#<Il= zKW<DP#u2*x`WjRX1hN<VA6U{fVN7>d?U$7SY>@RVaw78MP@rA-c5(^!DQ1{Qc+|j1 zl;l<iUm^bO9-6W*a>Fr~`#1b!`!J*|SSpX~59C5L$#03r@H<K{G)pZ4Q{R|M<+@*n zev)h?CY{5Ul`Cs?Rst|&qP=8fl@H^Rt#k%_WSBd($R@n#4#mu#e4fNnSgm%7tRmCG zF+qp@?5~q|;vB!1k%#|4?|AGHX6t7-L#$p}F1v&8eb(#Khs<CyD`3N8&t+Jj6AP() zO^WkAu||@&cILNBERTb$JCSV6>%f_IKAe2$wC9mCD{9zYN#&w6zzqUv=jvuOPH(hs zu{G2|pL|E-`;HYH_!Ppc(H4&jPro0J^tg>Rk1VX7KbM06{R;JwiE(J;i+rEoWruzI zKVvvjL~&Tx{wY51!kV(Xw|;z$$Myr|S;K?-G5vnT{Ft@@3^6q|K6M|1*jDE7+BSVG z^41Vx3?adiOBV4Ar<HJ&-{cTkNhXY)u$cSM%Yx<iZmy`EI01OCDBvO=GxnCAqb8ca z!Kthk7mqy}7<{9nchiO$+6RxyS*eS{+TmaRCgY8;!pncq^V0-OUfa&$|4j`eGR)OQ z`6jqrUUPXw?*h&!YWpuI1VW3dRnsB=n=reO#>s8<8m5j~9OHk(0VBmX6RY^5U`o6+ zRG?`RHtv18c{4X23t~J;=cl=FWZKW`p7|z@R4cx?!@-W5e)jJke>n#2u44-FOC2!e zzVx|#C=oYx7K#*p3POhF8TE+blAU;n_9_`k8w?M-Q2bUkfibPa$F6fJU^6+j$QR~x zOz*h#uB@I876b|_%#1=IrM~&qCmI&$A-<B6ODDsb6REzeW(7F-K)=$5CjojGV>S%W zD?qCw5BFCyVFIaeR-@CNSJ?ON#IumA(zq#=YyF4i0k+<@l9;hEB9J&9*{Y2>gPR-i z9&|zS*p$n*Aar&c=ga!XNN;vx+y0m)Qj&YHY!N_N#cv1gRXGuFv3;k#=V`~h8KK!T zN9^~dA*>Ph7HZhF4+@W)ujxpK;`U@Ar<b|~E*=bbPTH~4m0A~WC$Gh0<u_A>g8q{* zRm&x7A8`)bA{fqX?wN$9iUPgCuKU<CYV#mS=LIev{eFf=;u{o7%HQx&qJ=qzf($B0 zN+Ow4bmz&d^*FM~MXJb43X6Vh2R6>0gw}yEZ^9q0op>y~WrP1S&aOQ3lDkib^D2AD z4W|jv!ug=m<nlZ;J-mHF_MH~ABvnkkFCE1pKN9Qin>V4W(Cwice<jR)4{Hl}6bmyg zrBST@0Z_yBmz=r%5_VjbmU%iE2X&iF?R{m^u+SLd8|f1Qv-{0ODH9WL*|f_jhxY)C zJm%H);k1BdtE=H#Qr~dY?dIR<zh5vvb>ofE_1!o)pR6WLAq<<vZ?}##a^mvCh@TH) zbs^O!r%b8f1&pX#sb2n`4Rhz-Bp%i7g4Nz?)#%y3INx-ZAai^TMl!-|lH2HEwYb4B zn>q-xXf`~Wdp_WVNlof$p*Ju^y{pKj>e!Bb<otV;Tm^`(?jd;x-a^Bc(mDOZBe2!G z*N^SNAXb0fEPd8}85SZsljO(rv0Xb=_P`TPY!r99C~S5K8=~tezns{Kn-qME_lPnQ zN!3tJj1QpAf9Z<w+c(f_?W4U=BLUH|g05dqy~4krCGw?CU&W!q9_em=DXhB^MYBJv z0OoB=DSjTfhpmcDTb@teLxByWpJ@DD{Bv%!$$MHF->Q$=yV>Z%kpBKtZig0dAxGr> z?e|<TQ6|JV)olsAKLt9<cwRvtYun_6`y?z+btN*eD&nB|)_mo_M;IuoFQWbS3fm60 zuXeHvVdvwli+p^JSmRdYc_^=R$L~m%s!244MaEtH*KS<JnUfi*Rz7N2j@(HxT-ms` z{gk$;)e>fGE!`9!L_&YU1ztJMi#WmRRBp=i0GDZPXwoMSV+%)M{A*J~=yx&q(G+dL z{9VY9DY-->**||T&{PW&EwAtz#V+Dsl40$!-gi(M>)ZP&<Rp$9TCp}c^b*@1j<TBa z%i^>{KTn>E8#Ggg3{J_^LE+QbE&kUXI2+j~P&y<5-POD6UV}WAc=|`~=}dw~zvV-` z(Xr5RigLro*BiEF$&)glr$Bq$)HBP+Dp2`Xe)QaXY68X5m-%1pv)E(adXjUz0hWC7 z<UdoDLVrz^)vU)RY#WjY$7wj@oJ|L>w0r{0T~)HVUJ?ug7fdykdGcUdTfpm~r9Y8m z`D>48MJd*ERm8<I7{JP)|2NIg-@vV^t%%rr9;(Mz8&Y1m;gs1Q|J5;Vn7-qe_i4BV z=H%I#HHV%NXyWW|nWjxaZfB(N4aGRv?nz>`vR!~hvA2R9g#9>Qy<6~L>r?EB56aJp zB!PhuI_WUqzdQD8d)JFQRyaSuoV!MH4C<(ijtZO@!rb}Sq;Ir>U@^LuaLPg!R=-%$ zE=6SFIF(9aU%wYNaFShCevyQ;CkLZy!Y@J3)brruSurrDsdU%ya4arACJ~OQ2!XEL z)}*}J*95YRXPgANW$a;!p%2X%#LklfBrH{PFuRJ=x#pjtujBB@r!0M3y}16Fb1NDa zt`WRdt3P0s^a_z=^)qxHu*r=){tOq4eNT%g`a|zS4%f&iTUdMIePC?E7#6t1*6N>| z;XnjWw8=skt|<(?W=PM1>!a?E1V=^1%J*qxW>se?{5On#T#}Is}>)E_}sadW(@W zi8rCAW|8v5Vlr;>&k2m03}eHC*3(udmk1;o@`Q$cG#K4%Z8Gbig;Vv!mZGQD&?}I6 z=d+9kOwl)^l<^#_+$xxws@s`&%*-AFrVn@GszXJzG{<pN+xLd`iBmAU<ma)d2RO@i zuIL-{j$MtBPArTTgC5!QE<=9WL@EjUN9Ef67<=e$amK|V#Oe@T9{;y6UnI!?oA)^+ zT)S&}ZQC6ymzb_)21H`t<M$8Km)=0#?Df}f8;782xBaIaHz}A>UpuIzaCgVgzpf)K zXpSxGBUiW^%pi3(=uuUfG4|By9^#*C#U28sPLXFmd}moIzH2iF9m`a@!x!SPwKaO- zxTqI&%df<}j%>#%X7%MjRwi7K)l6Mt-SNY9hlQm-sN>{>ZPTqrLs-32o-0=115M^n zt;03wu;!rF;pYLVILVpJ&#Uqari$8h_n&+U?RsuMeMl>?|4TTtLzOJL|9QmdN4*T? zyjzcD?t4S)-$aIQ!A3Y5s)~KR-*8Ha`JMd4I?O42z3g&%6jM}qjxitS!r2>RD_`~q z!PxS19{V^gSeKp4_B8awzUEbZ<Jbo<@kO`RJ#RM-t$ggCcFu>yxdn*~L1WmS6i#C7 zSi|ACO&vpNXKWrc8hQcyVf~?4r}2h1ft(p7oA_5?-9bmG_3bovQrvnHw`jKGuXuO= z<#fR2k`)TEsgLl7MojI;CI`;51Tft;a)zxK>kigO89VE>dRb^|1dN1d6ZaPCV_tk@ zlw`w4taaG3=2tw0y*(PGUP(6Csqkze^Dqti`z>1v(OiNW3io1Lt!bEx*=j%YqzmVd zo&KTt$O>!f6i0^7+r!+1mZQ9fE%g4Pe$VU63maF74xIZZV03!grk89Q`f9kYM)r5& z(8+Owl#27v{-&AUzf%p?{HNl2MHH~g?(px_>}{+s3{#zITf<fAY;Van5A020`$c{u z4~NJoSZ<jSVP@YGvI|!du!v8^@yUH!Xe!*Jb6A)Tram*b4F}4?;0Z<ZeJ597p(yCL z04F;P9J|6udPERc$GI8|m?$Av;mVocfr?NoWjN1%$r<}Km}d`*1>lIwr;P)YB{=rp z;6SN@6gExW%U!m~!zRkPuiR<_Fx~3O!qC_84>@*n!v|(N1>?V`;YmmgcaDEc$#4Ci z>GlczRtuG|k3_~(`Q62@gb7soGFPfUZxNX(`~quLpF?NFTEy$_yF`j3X%V%Qo>)6} z{ARukAI@GI(dhIxg~b?mG1KWz0_h8(->P&CFqlOC@=N~)I?`QnIu-B>nk|-+i`@-j z{*vOU$VfT@$>+#RKk7?}BznQOD}xLn5m^kK*hmPh>}`=`!c{OM+C8(shYVNNE)jqC zDG>LQL`8d4ED%}Nf9pK9NFuN@s~?$Rvw-KbM#l4A-wDiImG(BzMTq1ZdlS62QwWrG z4TE~iY{cE!!W=b1H*n)g>4yvwAtDuzq5GuoPWx-U_dYQFC6Jc2vuBVo6Y12yCMn*1 ziJJqT{lfwe5J-~>*bJ{w6S-bY?{%UUAuwHS8N93^fos1%KiMm&4+GCrrw+xRA(HH! zzrU`vm%ui6G(BN>96J=l#qLE&VC{wV){(&&0`ra7zP_OYM4HpqPi&^8h}4$noqs|q zOjDxEcddF{l`~`<I9Q3(*Fqk6DNe!Km(A2vOHmxN>gLN?&4Gn`*m7Rflt{-c`};x9 z!JX;U@JCKJiAb7u<T&F|GnhX2ZG|D;oJhiv#SwRk1_y758OPAaz_x2pzQy?uFvEY_ zR@qVx=GkZMzAh%>z?^^{+cXK0WGnx`L$5#r8RMRV@m0SG<f}GKyq6kaeyA$NS@jIe zZoZ`Pe*TxR|E(Ch*2fUpU$x%3qs)p^eXPnf3`K<9dzP22q}E`p&?}xg_!5yMLsnwy zFguY_C*_?5xd(xSR*lundTGae6}+`}Ho%6x)&WIbCD8Y>E=C~W7J-wgPhiz(H_irG zTl!!5Kwu73e{pl~CnB|6lzdav0&cl|AbX2UIHW+bJs@?GK$@^D6soZZi%yTj8yyN@ zgTH?E)3<iQe)p{C@joxH)->!%o1#8};#eYs{;mj^xwol%Jm?gT$QbK{5%v-(Yzz{+ zLMot5<1(9{_!sD<3*{eM=7sT@sNallA~52OXms4%FCwd2VFAZE2iR_#lp4rLBGUbK zHjkjxBXYj|9e(GI49p8!KRy=52@}fSp9|tEoI&Q~YqsG;8mqjb-+eoJxO7S2z!)VC z@_5^*c&ihrmP!<gAH5~8zSG+pTYo}eKACf{`^`N9Gb2l|k$N0%d1_kH9w~$2de=dt zTkg1_o4h)cagazxy|qA{T!5YO&Sb;Uc{p$9dc63I41xTG8qA$^BQjFfdpggbh82fP z8cZr9xHb2q(nB+t$VBhjsXXHVYnRo&NWTw*`Ih(J`J*1f=31BV!N4`xGT9k)mI;xB zKICuxa1oJ)T-}}j+s^%zlhn>VL4oxGKac8OS%6jg)+ym9Pl(hy*Pe&8M#I(;L-W^y z1tMFee05z>A}%iZk+uqy0g>s@-?!v}1nPk^%4ctk5NJPYqg1#y%&X;pcqEt&^Cb+C z_t=?;+@B_=uT>L?<b&E4&eAVnk@jwS*LW9k-y2uQSdEjomVd%*Id?yibl(XETe}Yg zru3h=dD>*qml-yYEEETG8FLNP#XIhW_WbB3=woMWKBut|0sqLKPn@|)N1#1(rd#e1 zFJUjkfa^+6EzFXS*?vE-2Fp1ro40N1aW?I+rsB;F;_iFfA`)s(A?1RK_8zNW#NEN% zA=3wF2^5XZ=^5r*uxh`z?78<6f#UX==&9hB7_RZ+OXWKP%+?=EKCpNQ7irwh==zUB zy@;n_L1hb(nx7%klXDfutXOwnnj6O6$R<Kr^(UB#*%K)I@-z;b)b|l-m2s8Pa6^!N z1s5*(6Xma&!YtP!6<w4wbgt5tu}RkxSv}ZAIWB(3B~BIFh2};gWl#uz#k2`5dU*8G z$SdQtBZCfUECqqJmA>-ow=|qsTIAOYDS^$EG}-i%YOthIz++PQ3udRe7j9KPCXi=* z<8BB(Ngz|b@7ulFMqqj@Xn%=w50PHaxzfoXhQR1@@ms@@leiwIT^JK1L!{FG9xu7? zBh-&SrA~ED#3r`p=A$bTILmQdh2{A#7*#ueNa-*YfwXJ*x|ZiYTy4M5Z+=Y>`lzG2 zpXu)=uzi{eZKdUcuC9$4DGeKFYl~e~5S1aaK#;9gfG>1Esr;4RDoZ3MWOI=Ox8S@! z+2gyvDhVtD`V9f%L;^eaD^YP-VZ!e7Nh!>?4#MKFn(jk|Hk`2V_7wj37B+o8-uz=M zh)WBjDKXuF#9j7BJtf|}B`{<diOFX$z?2yWYr?(-!tQS>Mw;Tm1d{7#JR<X&U}65N zvspzM&PF|}Nl82eGv(iz6rKBFTsqll<ESJ0@V=V3(nSJW!r>y84{HeI!98L*;<ChD z8LtAk`Bbo9Vfl8*rQ-xfy_7_Mx7`F*?jfyc9Whu>3Lu(AO~5)!h`>!9Is!F)bb<I= zb^=!y{k7{a9zgNNVA2-V&OG+`8?|^z3wlpvT)6YdAKM`>OZ?MIB75@>&-YizF>_o` zUo_ztZlpZvUN;-tx!)QTFCQI*xs@?bgTkB89BFvpQ~M)MR4I2fB)lTh=iR5(rt^cv z69xwzcI#o|q(f4U_W^7~49^=mhxPeeZA)IiaC_*<&nl)M0tFe(3ty2EB8B9Hrd?(@ zku_ppYf5Mo&L;46>tA*xQtI(ar8Nd%`pxn0SH>!dtdHs6w`NlAgf-)O%+At8ved(F z@BdiAO3K%}cO}CKWMzhqyzlPAc2M{^Z#hbs=|0>_zDt+D@+MS5?1~Z$?BUN<A27tR zEsY;bC0$UFDSso<_b#l?*l+enDd3QSqeaF$N$i<?x{$XeL8Q$$k_)Cjk4q&|_XO-b ziDX-Ug+A_bge5Q5yJ0Qwi6s2Cu9@~f3CzN)T6|1-I2ZOT(xmDOfijPaD}ebTE__V2 zjnAAXkhgGYBouDI=GTF)KM6_LcU!27TZN0Tr-Im&?nWfiNqX5NuQw4%bamTW&)$Xk zxo)A%J(;kgZu!?cg_*#qTdbve^%%|^j=MZU%Z^JCyUuRheusk(_I`QzcOQW)H8w+5 zs1oK31HKw=c;if!qAB683RZU9w)O7ehpo^%G>Kjj1iHYm7U_<7=(~R*<ipXoL>Bdz z@w+cI5$WY7wO4jGz$W+g9?QEXM1~TZ_(X9(B1K`_+an})L>9@DyDpPlg_VHIPq>V> zU{>iCn_kc~E*|I4HAq<?lAp4p|9XyzKygP^*f{<y&h}m!&|6T4m0DIl6RrdzO~8~& z-`igV&WKlbG!GbXT+fMose~0aO-gf|X8mC$IaTs%k3Z&D(GkrSZHT)P<;dpi_Yj$a zewe?DmB;m>IBRA8GuU%gES$%{k3iu}eLZOPAc6MV8JiP9%>;^hhL9g$dSLbVfyqea zPF!z)*_6Gz61G1T#@Bf$!qPPclFS311eU8LS>fl&aJg4w?;n$gkig^AVkzx}F)wJp zUoBg}Y*G&Ow?TC<{H@9?-`<<ZZtOfe^^$`~-%2E1<Xt5)rW~`^wiO|eXa;Ht22>Gt zZ{Cr*M{7VJDUf}Y^wf$#vhZwAeFO=CIjSYV!+ef#P~>yC%XM#D5<2#jtWuLe?G@E1 zclb5V@Rb<Ui5B3ZvJC$`*Imq$dO*%Y(?X<i5DnH+qQGWZuG8-weDPz#*}M?-P9ptT zhr;d0;;`KKXy}j7cOuQZ>Z^H%yK$g|m*vKzV?+kJj*0xCE}WTWzHb%sg1~%Sv2}ue z2FK*Qsw@61684CzMUI@-B#=dYd6HnxKqRLFgO|mLL~>>W-q(FRxbdv*SbR+wfsXm& z*K?1!VMD>vsBtL=$Aj9=U&sz7FxQYMFp-rLXi|U4-0Pbol4Y%rj3)IES#&>f`15n& zs*rN48rx-<bST(%ljR{WRo-6TdXPgr@Lol$!h0SY3S!hmU+NIa$GJ3?@5n;uf<Niu zbvE2IwES6oFpxmSS|99XyGCG2_`7bHq7IWseqZ1Zj3rR#n|;0SaR9om`GLW>C@eAc zjhLbVVOQU9QFJ;NxOtnINM<?V&>QKu><<Eoq^!qdYPb7vtvp!doc=yots8U>!6^cD zPRt{4pM(|K4eoU7H6j)Lr3-Dc{X6rNBeeZt368e2cgatA5NO9&7EO;dVO1pYMY-mC z0(oZjn=)%z*k--*@Ss2khG?4%7u)6HsJX7!htyywzpBQvuSO24wNHxM+x~*J6ZeG# z{T^Z`uN>EfnJ8Q$NWCP~M?)Rcdi^ykL+lx`bEuB~j>CK+RRbc&p~+`OP4k{KjyNtq z9xG_WF@|uqvP@=3&HJeGyfg}CN#j~kgbPQx9jAD9{Gi#VF}Z4cbFg*urM{CJInMtO zy!%y34VMm6QOPGqz?Q}1v+lu{cJ$!Zad#sarnGM}?+;7C(bzGnMwuumsP3{J&Ao%$ zjAw_=%jV)>L1u-b4I8$~9Xk8)9xwC-UFbC`@`VBF`x)AqLa<@(lhz>M3Tp>*ocRS} zaXOP?JD%egtYi*+);m*-L$h_J>brFa)K4fPt+MDK|8GOtC~-G5?3KLU|5FSnCr&Es zniye;!@b90gN3+gLYWkFy$7}*WJ$V3G+^5ows2<8_#MBLg6yyN3^si-%8b3V8^*W! z1j=+TLh$Mfj?Bm7xYTdLsp;N?YsUw2Lm7`@d+lfKo02<z7n2wr4<|zP2{$jQn-?I3 z#!2`?<4r7hq@|t2g)qj;oM96&1e5Gx+Q+{s!Zz8y`$8p0vGLWN`3ChAj4^LKE@N>J zdihd9T>^f<(%M;?Eh8!T+oncF>9>jz4naDmVi#d4e2>tO8z(lAkDs?0F(i<^ziq4g z<_T1$)Cn8Ec@D!V9&JYgmvMRK@P)BOUaYy~V`ZpdhchoUJwJ_Dqo-(@v{FzAq#sQ1 zes^3A=Y>_vGxhIb{ro8n*RKlDZ*+X4WY2yW4GJxj&x^!v7SqY(>MpFR+uNTpy@V|y zE58T(9kK4__GiEHEtn2Cob)nGddKeGl!z3u!6g|(FZzR7IC5S2;j4(35F=*tIaBW- z4n1Iu_*Hg}NYdh|eocN4zVPx$@4u6dlMUZ$Itxx?JEQEwFy&eJ`SkgGaTOi*ov;`C z!5xmp78iajQ##?y{F><=!73Q!8!^@R%!$25(ld;YgRy#e*XZ}hMX+oh!u)e~duM&4 zQP^J~gEPhU7jjyTLz&>CBeKp#FtSfJtacv}d&We(eomV~YP_P?Zgn!~JYALGGWQOb zf3efo86U;LAS!$l8-!!-{$wq113G#&UdQeCCy)+W4%Isp!vOO~SILGTm?_HIo}Q$D zf@T+m@RHroWxbTgqp1e%WuI;4PNx#dxt(8S4pk6Hbkf<lpK@T+U(4U#EDvEi=)llB zzW|(eK1)<FD}hlhw|)1bE<z=Dsw_i0D|W|}s+k0BV*lQx!sb99ED+36UA?Q0L-zN% z<)(IHSDoqCPS#}X{}g@y4fQP;|Hygwv62kL7MHH1npHz^>Pvxp@kgLWnDSZ52R#BA z!OqdN<{FWtpW#Zf1T!>q%=U-;Y=*Wa%G;Z5Q!uc=D4PAC0)M^xU2se{8`r|R0@%U| zp!)H3Pp|Pj=*lMJNU!XIV2`CZ$qRfi=|?ZuIGc#b!I0oN@E50Q?@BH>b7Swxh`KeY zqZpLnK1K2FJg%|Sbwu0OLuG<TMxB>CX4QP<VcKlL>Ae?HZ9I5zKABe3+4=@P?2A9w z_eBH7NSIYls^-EvulvV?yBJ~aR>fhLS6(o8B`>K>=rm5rvp9K5+CxKUB>8>SAzV3l zjgT8~0)|HGM#o-WhtS{VzA=KKSQlKk&3fw?E`0MyIhy$ZMiV<KBKY4x%f0ht_i}#V z?D#XA3A!zu_{<~XAV~^+mL5YDhPz;-cu6+o*k0VsS%~<czz8G3r#Py+__2SQ<7k{4 zANE<?E+~+`gRAa~4O7RC<Lq|N(Y-H!;$&$T!vnsnFjgR{IjNKi{hZ9dpE`y>-pfhx zvrH^lC932k=H-P|GnA%8irp~J5)v}SbO73f0@vr9)FJSEi+!+uBUDzKcb|&*2s6RK zYBR*WFzwVf-}IscW<-B~e?8lbOQ{boTQ4f(xT||8BlkSE3~mMx0;Vumb%x^**BO}k zD!)~C?GDb>#Kyeb>w!HaXG5$l$8p5^L5sKOHjK3&4v`xAjWut@3FnSlz?4R-c8YE{ zPAGTzDq5{V9{B|0uD4}7<LfFo*ZT;z_OSP}==Q;+y)&f&$qA@XmzLwMRl`|PM!r+$ zg<-B?yNc1f3~J`&%+^O)afx>hbSUI;L0Vjbo$@S<ZDkG~Qsjmynde%nqvo)vT_R%B zGK~|z|H|at=YiF{><2YJq@maCgF=az2lmZxoDToPzO$ZWoR1^M!q}UQgA`xuU@5-D zsqb4pO!CV1kZ29yP_f}OnOO`>8(7F5+vkkMhFirlRjSzUAhXQs`wjatc;&p4+hHYJ z#XscF5Y%{S3{&dlW9&KPq?ww6deMN77i8pc{NA!r?7^$p7+626<Z}&LRhUEt1i4{k ze0xjsWEI3lI8?HrDUPTs1#g9qqRZ!P?R5Dk&~xiUBJ22mSm+=-|1x3(7fDKg(~1b= z(y-I(W2drV2HsiZXR%@jedWWic^Xi|y{rGxT0Hj2Q>K+`Z)5LFpLAyaB&PjD7d7u* zSSC!Xd1mE9kEl$>?TlWS_MTzCvG*3vzjj0OXH?kqdcBeSLJ%Yunq(iz6vwi<)!Fiv zeX#I2+U&!WAI#WayQuS_3Yye2I-WEK;ov=m4ws6Z=WO2rM&|e{xcqYeUH<QLFd1H; zDBQsST`pObUI*QA*5K-~r-7v~o5f0LM#+uK_iCwAzcpjM%SgF-b|y?@UOlMrX$jin zUG|%E1VikLBKf~`0kF>A;vj=zIH>xdZ8QEhw8q_0OS|d}wTq$icJ+^8>Bd%tS3?V| zopG486F&-_kExgr)y?4a%e5}yOle$lc@*C@aU7@2s<MhJD{)!RCi+U10*pr2?K#kX z5^EF>`nF!%gt`LH3dys_VPNje(O9u_u-=gudn>mBXPpnLNJ(D8C2e+t(76$8HEJ~? zJ>QCxaji-P(ac0riky+7mkVI&&U;n4tG}@3>ZSe4d%a<6ygRmEpABZJTvoqWT4R3U zDZWI3GB98pP`v3J4|Nx1Ma#t#A&<K1H{<?7>_4kYcA4@a^w8gEQ)jykgN0kizIrO- zdbU@vS1=upT^<vqw!RM4WTHOo%K11$8xk<?NsX>7kNDYN@?%azobg?beAsMyn&<Vd z9A^(+SNE}{!l_-M#U-B2uzr-!_RMQHSYpnpR$i{dWwE^;7s-pT|KxP(dWAczxJ4D- zc(NB~Sy~?~&6GlMVN7J3@&?2g7~V<Qr-jo?L#OE5&)`~hpSR?r6zq~nDt!970hiy# z4!S5$V`I<zfM?Hwh!k?a>78TfG3)9ed6eNY_8lM<+FU*jgWp&8<T`QVklN}`bA^w% zFyPS<-hB+)Xb-xE-8lkniWa%nYF0S@)K;Q%!~(|+GngdZO97wne^8vah07T?zeky{ z!bTp+fYkkFY#vEWKSCXbVZRqXCS16J(_I$K-E3SiLCvAtMtBAji4xj-(*$AARaD~O zm&dR*vM3c~N(ueCe0z1u?XYLIk0i;jpGbki8sU;gFgLu^={aTsBQHYkL>|$^S^Wj6 zVLfu}edEHQJ7A2J!v}U1NGoC!jf20g2p5d$Y}aw})x%V#QX|(NJLu!^vTeQY3#07T zy5yH{z=+H92><WCI7WN3Kh~rG9@k0FG+wm9){x1i!?*vy()p$*d?md|%srM}BJ7T% z^p@snlPfT*Aa!)4LlNhn*E3KIu0Zlgo1E02Ef_ppdMr>!AC@qvjjpQ$hfJowj~YM3 z>HEIF<$~YihE;~0?BQ3~Ql0s@)#N*F-Z$ep<spTArHzx1?M+}bd(iso%O33SFJSYc zal|>Z4cfyFDcEkseN^I26wIg}p?sJ77mH&Kzhbo2#qf(=j^9envDRPHDJ+K+x4T1U z8J8~r@yGtEkku;a9n;H2Ra0zP@f@~wl)>8A$sc(vmvLF5Eiy_a633fNsJYy4@5E^e zN!8syp-)ld#xJ9t>s6ObwGyZS4P&<N8SUsn)-?B{>^3e=I(FsMRpQ*1`J+R@JL4~X zSe+ikjROiHjmh8c!CZ}0fDg|HZ2H`*mOXtLW~7{MGrSDN{^WogF)XUM7@L^5#&{I^ zqi;M~ytM*-Mcab5SFCYNf6zutZ3ssB-7XQBePFeqGRG?5CC)uFAd*(-;ozCh=Fm?e z@KK|0>6Ex44vg5Oeqd6=vCUhXq*_QMl_Swmxjl)we!s3=I=+O<jbAFwgR*d<@vT=_ z+;f~aNNF*9yc;Hbe^zJZN8!AZSx3^vSsZ-yLHTf}C@wGPlea2bK##K1<pdEK9FVx{ zU6F7c>vZcK8h0hZ$j0}gJ~CFE`1($jPSz2c66bF$edopnYl<T8t7l=`>NT&f|6#1_ zEf<OaOPqf0^K3HkFZN7TD19TT#3_NXIxX{c%=K-rDC0E8p@xL_*VSo=ls!Htz2wuN z=7-o(8YNfkHu5hprKtsb6YRto*PVE5+|6w^eW#zCyB-I+L+?eWmt)z<5V~R^&%?Hh zKr;WKz^E%97Im_;M#U9y&f&@Vm-o2fFVzgA^AjqFOf*RR`}!*MOg@de<M0D#G=Erg z_xnI&#qn<Wm;TtT^w}$*lN)>dJC}8z^57gz#RY*2ZMb<{t-3}r3i~<E+m-rALQTcB zG!*85iJzY}xNcWt?eA>PyMg4m?5t-$v->KJY)?KJO%}vyU&AD^<$E|dnMCsZ<0woy zC&?}L8Q}Eo4V7!P;jmyV!1*=V99NUxI?Ab&;PCe|cf`~0VafKnAn%twP{EVGJ>V?` zje;|*u4n29q)Y*<bT27!o>_!Khh7yH3XW!p>FmYMVj0<+X9lt1&Pe}~1vO0WJ^Yj2 zE*pAfkEOKnDr1LT=)e*+H{^Y5Vwb(IhP~x4Iae-=5$P29t807rK|gP%qLY#bfo!r{ zCp}po+vPtLWNMV*%;eIWG%HuEUJO2SJl`0$WY%u#+n<B^Bfp0tzYajXx$<=}x(OUR z&1%2L@*plW=jZ>86vSb{2N_=kbD_v!BDV9fHVl0jv7NsWiwlmvexlxXxMDF@Z5bU5 zV-`J6RBN?i>lE#;xQ~0WR4g%Z%%cWpaGo%6BNAE!hHJuZ+<;z_pPrB6f_C&Yx7q1# z2Wzhr#P8=&Lf2ZLZrgVnY@|s3b7QFrM!(~}h-7XkJsJ~Ao+t$is=Uh!qwJ7&j7|OP zco1$Jo4=a2=z=o^5tZ=@*P!jwt+b&R^RU@zEu-;H4o5BX4cD2iprJ(Al-xNVJ9Q4G z*L~S(pD*+b&jCGH+UPvfdiE3KB;0T4jTgb5bKkdQ-0gAUSy1j4$r-3KxjXpG^cl`p zTx(_9eGz(lIfqrh-NeOHcO~}K=tA2={?A)!J}`gy)^Bp2M63>BquV`44dXG_)&(ZN z;&zCVvAfa-9GXtGYwwanyU%xg_g0qSLZP&N!+a7pF$XZcj$y;T_zD6Y=YAM`Pstl< z{Q)*<6mE|n>4ZM6wvSp-e6aY_!=BUmJ(0FkaNUBr9I8Iw+*{)eFjlR~X;b|a#?Fj9 zbt(=;;v&!~KH7(UQytmVQg2|CB;+a2^9<;zDm+2s6$kyU%g!G5uQ2OPY)t4XId=c7 z=DTuyFE$<f>bg&c7s~6$&T%>x!RSik>5*khBAH9IOnJlr_F775+|RxNON>G(c4sdV zX#eC2pH8fRh2tOp^7T!@vP)pBh}&Tt^!uDrw8sbsv&9_rzu$%Kz(2pN9zDmJ@ofvi zGnKfpZ|I0dk`N{}wDGA;R6|2(%O@qaoptc{bAf|GmbkiW#Jg@@2bNc&m4@a|;Y@?g z=iPtoVRk{*VD`&2_D-c<dEt_X^^+O#i64hx=oif~?a#&Fds;$Qn13$}h2&d!h5KT` z*-+z7*H$74vsBLyQ*#2@tEw}XRSw`p$$n4D7*T8rS@?A%SQA%TT*(Zx`>|k)?y?M@ zC4r<ODCUGuCr%Arc->4z!~r438S*$UY-aL~6b?VPv#x8s3Q2khGs+|VmnRmm)8J%$ zi0T1YILj+3yQzo)shW)fj@20VbD#Tw`aYOAHy`)Tu;M?`?PS~k;~e|HpELaTbo_tF z$o?Bo`VahnFDLs4Iobb2{?F?FGmL+%^N;mO{|WB@tl$5mHvg~c{<E?CvvL13{C{lg zpB?{Qpa07z5|rlpW=2nT80LT9hu}X>lHT}FM&~~-yJxEZ<jH^N^}p0-5ZBZ<H?%Ml ze_~{0_#Zkav)U2+?(nu!v*Ho^V}sKq)ZO@z%sVjpX|DWHjza!t11s@+lWZ$L#~-Pe z>Ql)3qGjplKIC*^-&Vo;-=)%@N*)R4FGjqh<Y$@T$=;J$4eKeq;#Nj~4I~a6VS3SG zd|Ua*A-?Uxw@TOa#G))e2D&|Cneh9(n0hgNmDpdbCZ3~XQ@(#gqU3?c<d_U}OxUmo zERa6DHuHchNUJvVG@H))Q7g95yDKicB0ZHJop5)29XNF9{2@vPRgM!<MD>uuvk8{O z9MhJHj^GasbL1Mqso!Fw3AEC-%!$kIIRexQ)>353*rO|!<6V;Lb(~H2SC2%gUaEL! zXr*FMF4LhkzbiWGwJ@8bb~|&KnxJH!-_JCma5%JWmRv%uCVe_);_uYcTZbEH&LNvb zGM_rtqK0A=dyB6_Z;MOLZsn>2NBwO^Zmxe@>28$!sHFcY?Y^subzhCcfjXMM)HjW? zQsm!9u`L@G+@dB^G94{frV~p)?m-L_rIvQiTc>s%Pi-F3Os7(|4|z^x$t>aeV{<%h zp_<~`>j>^QQJrNu%c87;V@!-M?OJ2v&W>$-KfO};YN08aH<&<fwqjD_A4_he-|1j! z>z3uJNm<)t_Un*+mEKWW|EA{p4_y%qCGqAoyE!hsNs`F-k*_<u%T4N+THyMTW8x8M zL8^99rFIHKGt+gAvA$v^D~mPx+Kmg#O?RDX_(wvIZ|skaOSHLP#4XifH@j>XGLXUD zm<Wbb4|GIc*|X@W36ZaGdAG0mU1Kk4Q)ft<&Y&u(J#VEII4Bpy<C|;Wx5gr*OzZ7* z@GeKNyao3NQ+dtcsbHoRho>UtRj%@notT@iKZshFp4_CmKezZdee9-3hm<EXdF?B9 zyUC*`ujCPD#u_FsO5Tfzr>n4b{8=jh{E4YW)2#3l0lWu8_|ns5@O|?%F<|RgvGo;W zg?HI6oAy%pO@EqNHS`%jFdM77Rolq<C6`AuM#{l4aGUlf1&i6AuRke9bEEU@AF1}J zJZ)!l-jka^^ZlB%74@PO+nb02<F{2&&90AlbgVPg9EWPsl<AM~Qxj_p`h4GPPWg$R z`NCQ@8KY!-_Vxx)lQ4*kXCwsl3To^QA;z{_Tae`8mck_iy|FUCUCs{-E{am{tS+Vx z&bX)Pewc82y??OgRJJD%DW&P#<m;@;nH&c`bX~4_ak2W8RGfW@@5|85=9P82i@_C@ zCvU!`dL%<7bH3tJz|m&ERcoz*U!Gro?>X1~IwW64M3wJkPs}r$7CPCmcdu>wJ_aVA zdus0`Fpzg^LSOEa$ZoyQrQ^mVlzckXA0KlLy*(YnQ)9QfJH6BRmd_)GnsN!RZ`|$O zhA(&&%x}bnP~POCsX2Gu)mSfaWsLrUm!{1r-JGzkDXsB@kNFu7I_C#?;giz${Tawr zqC=si5nGWy>vu=MLgd!TFP7nVdUfgH+i7`0Ub4ef7JD@3FHk&K<R4OEemE4C@<eE@ z^{xCAZ)knL(Gf2_{oZ-wPpoc&pMF<T#BN+@O0qkzR{kh-{?fzwJ-gp{f60-H8$0GE zKvSlw;phD9lMNZ2Bya7Xo&y{XBjcIIie9nDX~OTB`tk;~9pR^Nkzg^S3{UG5DZibc z<J~kqN(`rL=P>a7k(I+vQue3jL79yx6@>iM%+){tp*qujmw8gL+SSl)(-G~cwLf}3 z2F;INd^xoITkk`~#6_x{b7HQyZsZ|-%eU$IBZ+27PnzG-a{J#`<a5+5b~-I1A*D1! z&v=xY{U&upcmv1#YlHQ<gt?2|N@CG`FBw1V6<8NzGvtpGzBkzWn_K6t;SlcIziAY~ z>q`-CNNQ43C$=D(P`T+l>r{DnXqMOF$Y~MvRB@y8F5l{R7aqD~7SHgNYm79WXVTZk z_VL+)7`j&{s!vew&(+;+<RkE5p#8kO?e&|7)xNw|rd>It6Y<wN;j-*nixDG8rYL`K zi2fi~wx&`1LRC(ASx(<Nf<B0=>^$cW6SbAg+zSPrsyxHxgR}?OY4=j?7ury{%y^dM zrj%ddbW*?tJ)x@2zF52Oiz+U4TT2HGBQ&+M3>?#{_WU-dNj)|3=ek^#rG)VrfvmnO z)bY*lLV7iv=#Or~6Y+HF$(Oc?*)){N7kMT=tNR`?_DbstW9A^gpKDZU^nzzx_)Jh7 zduW-*>AXq*G~MxYuXd@-iT6|K#jAWTf0L_yhUDFnWEf9!*~sdx$XYvtM^PU#e@ZaT zyN76o*&62G5LEpheD$sJtuuMcQhG99JO*Ma`Hp=Y<0E~SmNT<0zA%19@k-J~RTsMl zOW9^bjieyk{Ak6Rkwz)0!)DG>Uv8w7TG(Aoc*-0n#P?Y7NW_gB>mx<1F68a4LlrOY zc>gsmSNP5KQNl^EeBlmb_oIuW18j7Av*;bZn^_%lB;BK9$7nr%L!5<bQIuBe0nfv- z>V6Z7#~)>Q)+^>-gw57n5Ib(qtDY2{<l9#v-k}y8)SdcH!H8m0G9)~+sw~V{kL!r0 zNc_aD!|jqr<EabJ%5(QHbmYI|z3B2@akN%|okf7HDmnbaiLXcR{%Q3hLeLxD9xHjH zsnzMpX6z<VjGyJ-$6D#_5GxqyJUU%)bamS}Rv<%L;(1lu7R@hpmzl)uGn)^?T2_Qi zuM)$w2<Fr}Ri<4wu2Scs;+Ajm2`_fj*i%OF&QrO4ykZkVf8dGdw}jqz?1d3mo-*IW z(DDJ3E#dRh3W8&!tb|Fv!g?v6gZ;F>Z>QanKPdPiFf+T!>ot?yvyY#CEcL!yn*4L{ z>aPglMV_gKsl&|ySL9T_jh=Ql3auXMy?AY=_d-S?^X<51iwB{;H|2Gu>Du40=QoH% z5%Y=CYGctE96iydn_|Z#OGKS-DgP$;oeiP*s^@OzM?QDXfhP!EQ_|PX9+CMS+%=@q zn^Y~V6;eGdBmREFQ?0=Eb%J7d@39=B5P@ufn*SsX%NyfKdv)<a>ae%NCKZe4V~Xk; zt`USu=u<s6&-ENV70OkVtNEw-%qzO+v<%LsjVnLsYGW|Q`}M5nyaUG{(^u@;9mHMT zb2TKp;?B1~fz<Bq@_dS}seaY3(Zz2beUdmg>mq&aqR+g_4N><vzK`WC(UEN3zlGHY zJ<aq3ycynq+rLiu<}ROVU#rv~Xt(WnDP(ov_VRf4EqPwHwyCQ>BxTuy_KC(xjz8V| zX+UJesWqVdO85;XBcO<QbHn`V7goNgilES4s+ZDcMVhvjSM;uGE#12v_~~Ln)lOh) z`S28t2mhA5vfZ8`uc3JcZpIH%`xOtRZ@jYg;zPFdJvo8*=eno9Rh@V5_c+=T-Ei^O zxF_@XC*Nrnp0VW%^qv{FbXN3ZAp6HHs}zj>eakAQ9lnG>L-N0b{)PK5`rm<w|E+ZT zC*}WL`p<g*4EO&j-ao_re~b5zxc{Pm(SN1?^6u#0_D7`qZ^yscoPT`JKylB)M(00m s56t|p>CwM%|AqU%1MdG{`~5H6f8qXrgZn?TJtSNI2e2Gl*uqx;0L*yv5C8xG literal 0 HcmV?d00001 diff --git a/pkg/inst/testdata/TODO.csv b/pkg/inst/testdata/TODO.csv new file mode 100644 index 0000000..d679966 --- /dev/null +++ b/pkg/inst/testdata/TODO.csv @@ -0,0 +1 @@ +ou alors data_test.RData, possible aussi diff --git a/pkg/man/valse-package.Rd b/pkg/man/valse-package.Rd new file mode 100644 index 0000000..534375b --- /dev/null +++ b/pkg/man/valse-package.Rd @@ -0,0 +1,37 @@ +\name{valse-package} +\alias{valse-package} +\alias{valse} +\docType{package} + +\title{ + \packageTitle{valse} +} + +\description{ + \packageDescription{valse} +} + +\details{ + The package devtools should be useful in development stage, since we rely on testthat for + unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette. + Concerning the other suggested packages: + \itemize{ + \item{parallel (generally) permits to run the bootstrap method faster.} + } + + The three main functions are ... +} + +\author{ + \packageAuthor{valse} + + Maintainer: \packageMaintainer{valse} +} + +%\references{ +% TODO: Literature or other references for background information +%} + +%\examples{ +% TODO: simple examples of the most important functions +%} diff --git a/pkg/src/Makevars b/pkg/src/Makevars new file mode 100644 index 0000000..50b7fb6 --- /dev/null +++ b/pkg/src/Makevars @@ -0,0 +1,11 @@ +#Debug flags +PKG_CFLAGS=-g -I./sources + +#Prod flags: +#PKG_CFLAGS=-O2 -I./sources + +PKG_LIBS=-lm -lgsl -lcblas + +SOURCES = $(wildcard adapters/*.c sources/*.c) + +OBJECTS = $(SOURCES:.c=.o) diff --git a/pkg/src/adapters/a.EMGLLF.c b/pkg/src/adapters/a.EMGLLF.c new file mode 100644 index 0000000..9b004c2 --- /dev/null +++ b/pkg/src/adapters/a.EMGLLF.c @@ -0,0 +1,91 @@ +#include <R.h> +#include <Rdefines.h> +#include "EMGLLF.h" + +// See comments in src/sources/EMGLLF.c and R/EMGLLF.R (wrapper) +SEXP EMGLLF( + SEXP phiInit_, + SEXP rhoInit_, + SEXP piInit_, + SEXP gamInit_, + SEXP mini_, + SEXP maxi_, + SEXP gamma_, + SEXP lambda_, + SEXP X_, + SEXP Y_, + SEXP eps_ +) { + // Get matrices dimensions + int n = INTEGER(getAttrib(X_, R_DimSymbol))[0]; + SEXP dim = getAttrib(phiInit_, R_DimSymbol); + int p = INTEGER(dim)[0]; + int m = INTEGER(dim)[1]; + int k = INTEGER(dim)[2]; + + //////////// + // INPUTS // + //////////// + + // get scalar parameters + int mini = INTEGER_VALUE(mini_); + int maxi = INTEGER_VALUE(maxi_); + double gamma = NUMERIC_VALUE(gamma_); + double lambda = NUMERIC_VALUE(lambda_); + double eps = NUMERIC_VALUE(eps_); + + // Get pointers from SEXP arrays ; WARNING: by columns ! + double* phiInit = REAL(phiInit_); + double* rhoInit = REAL(rhoInit_); + double* piInit = REAL(piInit_); + double* gamInit = REAL(gamInit_); + double* X = REAL(X_); + double* Y = REAL(Y_); + + ///////////// + // OUTPUTS // + ///////////// + + SEXP phi, rho, pi, llh, S, affec, dimPhiS, dimRho; + PROTECT(dimPhiS = allocVector(INTSXP, 3)); + int* pDimPhiS = INTEGER(dimPhiS); + pDimPhiS[0] = p; pDimPhiS[1] = m; pDimPhiS[2] = k; + PROTECT(dimRho = allocVector(INTSXP, 3)); + int* pDimRho = INTEGER(dimRho); + pDimRho[0] = m; pDimRho[1] = m; pDimRho[2] = k; + PROTECT(phi = allocArray(REALSXP, dimPhiS)); + PROTECT(rho = allocArray(REALSXP, dimRho)); + PROTECT(pi = allocVector(REALSXP, k)); + PROTECT(llh = allocVector(REALSXP, 1)); + PROTECT(S = allocArray(REALSXP, dimPhiS)); + PROTECT(affec = allocVector(INTSXP, n)); + double *pPhi=REAL(phi), *pRho=REAL(rho), *pPi=REAL(pi), *pLlh=REAL(llh), *pS=REAL(S); + int *pAffec=INTEGER(affec); + + //////////////////// + // Call to EMGLLF // + //////////////////// + + EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps, + pPhi,pRho,pPi,pLlh,pS,pAffec, + n,p,m,k); + + // Build list from OUT params and return it + SEXP listParams, listNames; + int nouts = 6; + PROTECT(listParams = allocVector(VECSXP, nouts)); + char* lnames[6] = {"phi", "rho", "pi", "llh", "S", "affec"}; //lists labels + PROTECT(listNames = allocVector(STRSXP,nouts)); + for (int i=0; i<nouts; i++) + SET_STRING_ELT(listNames,i,mkChar(lnames[i])); + setAttrib(listParams, R_NamesSymbol, listNames); + SET_VECTOR_ELT(listParams, 0, phi); + SET_VECTOR_ELT(listParams, 1, rho); + SET_VECTOR_ELT(listParams, 2, pi); + SET_VECTOR_ELT(listParams, 3, llh); + SET_VECTOR_ELT(listParams, 4, S); + SET_VECTOR_ELT(listParams, 5, affec); + + UNPROTECT(10); + return listParams; +} diff --git a/pkg/src/adapters/a.EMGrank.c b/pkg/src/adapters/a.EMGrank.c new file mode 100644 index 0000000..8da760d --- /dev/null +++ b/pkg/src/adapters/a.EMGrank.c @@ -0,0 +1,73 @@ +#include <R.h> +#include <Rdefines.h> +#include "EMGrank.h" + +// See comments in src/sources/EMGrank.c and R/EMGrank.R (wrapper) +SEXP EMGrank( + SEXP Pi_, + SEXP Rho_, + SEXP mini_, + SEXP maxi_, + SEXP X_, + SEXP Y_, + SEXP eps_, + SEXP rank_ +) { + // Get matrices dimensions + SEXP dimX = getAttrib(X_, R_DimSymbol); + int n = INTEGER(dimX)[0]; + int p = INTEGER(dimX)[1]; + SEXP dimRho = getAttrib(Rho_, R_DimSymbol); + int m = INTEGER(dimRho)[0]; + int k = INTEGER(dimRho)[2]; + + //////////// + // INPUTS // + //////////// + + // get scalar parameters + int mini = INTEGER_VALUE(mini_); + int maxi = INTEGER_VALUE(maxi_); + double eps = NUMERIC_VALUE(eps_); + + // Get pointers from SEXP arrays ; WARNING: by columns ! + double* Pi = REAL(Pi_); + double* Rho = REAL(Rho_); + double* X = REAL(X_); + double* Y = REAL(Y_); + int* rank = INTEGER(rank_); + + ///////////// + // OUTPUTS // + ///////////// + + SEXP phi, LLF, dimPhi; + PROTECT(dimPhi = allocVector(INTSXP, 3)); + int* pDimPhi = INTEGER(dimPhi); + pDimPhi[0] = p; pDimPhi[1] = m; pDimPhi[2] = k; + PROTECT(phi = allocArray(REALSXP, dimPhi)); + PROTECT(LLF = allocVector(REALSXP, 1)); + double *pPhi=REAL(phi), *pLLF=REAL(LLF); + + ///////////////////// + // Call to EMGrank // + ///////////////////// + + EMGrank_core(Pi, Rho, mini, maxi, X, Y, eps, rank, + pPhi,pLLF, + n,p,m,k); + + // Build list from OUT params and return it + SEXP listParams, listNames; + PROTECT(listParams = allocVector(VECSXP, 2)); + char* lnames[2] = {"phi", "LLF"}; //lists labels + PROTECT(listNames = allocVector(STRSXP,2)); + for (int i=0; i<2; i++) + SET_STRING_ELT(listNames,i,mkChar(lnames[i])); + setAttrib(listParams, R_NamesSymbol, listNames); + SET_VECTOR_ELT(listParams, 0, phi); + SET_VECTOR_ELT(listParams, 1, LLF); + + UNPROTECT(5); + return listParams; +} diff --git a/pkg/src/sources/EMGLLF.c b/pkg/src/sources/EMGLLF.c new file mode 100644 index 0000000..e8b3b84 --- /dev/null +++ b/pkg/src/sources/EMGLLF.c @@ -0,0 +1,420 @@ +#include "utils.h" +#include <stdlib.h> +#include <math.h> +#include <gsl/gsl_linalg.h> + +// TODO: don't recompute indexes ai(...) and mi(...) when possible +void EMGLLF_core( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + int mini, // nombre minimal d'itérations dans l'algorithme EM + int maxi, // nombre maximal d'itérations dans l'algorithme EM + Real gamma, // puissance des proportions dans la pénalisation pour un Lasso adaptatif + Real lambda, // valeur du paramètre de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real eps, // seuil pour accepter la convergence + // OUT parameters (all pointers, to be modified) + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* rho, // parametre de variance renormalisé, calculé par l'EM + Real* pi, // parametre des proportions renormalisé, calculé par l'EM + Real* llh, // (derniere) log vraisemblance associée à cet échantillon, + // pour les valeurs estimées des paramètres + Real* S, + int* affec, + // additional size parameters + int n, // nombre d'echantillons + int p, // nombre de covariables + int m, // taille de Y (multivarié) + int k) // nombre de composantes dans le mélange +{ + //Initialize outputs + copyArray(phiInit, phi, p*m*k); + copyArray(rhoInit, rho, m*m*k); + copyArray(piInit, pi, k); + //S is already allocated, and doesn't need to be 'zeroed' + + //Other local variables: same as in R + Real* gam = (Real*)malloc(n*k*sizeof(Real)); + Real* logGam = (Real*)malloc(k*sizeof(Real)); + copyArray(gamInit, gam, n*k); + Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real)); + Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real)); + Real* b = (Real*)malloc(k*sizeof(Real)); + Real* X2 = (Real*)malloc(n*p*k*sizeof(Real)); + Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real)); + *llh = -INFINITY; + Real* pi2 = (Real*)malloc(k*sizeof(Real)); + // Additional (not at this place, in R file) + Real* gam2 = (Real*)malloc(k*sizeof(Real)); + Real* sqNorm2 = (Real*)malloc(k*sizeof(Real)); + Real* detRho = (Real*)malloc(k*sizeof(Real)); + gsl_matrix* matrix = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + const Real gaussConstM = pow(2.*M_PI,m/2.); + Real* Phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* Rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* Pi = (Real*)malloc(k*sizeof(Real)); + + for (int ite=1; ite<=maxi; ite++) + { + copyArray(phi, Phi, p*m*k); + copyArray(rho, Rho, m*m*k); + copyArray(pi, Pi, k); + + // Calculs associés a Y et X + for (int r=0; r<k; r++) + { + for (int mm=0; mm<m; mm++) + { + //Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm] + for (int u=0; u<n; u++) + Y2[ai(u,mm,r,n,m,k)] = sqrt(gam[mi(u,r,n,k)]) * Y[mi(u,mm,n,m)]; + } + for (int i=0; i<n; i++) + { + //X2[i,,r] = sqrt(gam[i,r]) * X[i,] + for (int u=0; u<p; u++) + X2[ai(i,u,r,n,p,k)] = sqrt(gam[mi(i,r,n,k)]) * X[mi(i,u,n,p)]; + } + for (int mm=0; mm<m; mm++) + { + //ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r]) + for (int u=0; u<p; u++) + { + Real dotProduct = 0.; + for (int v=0; v<n; v++) + dotProduct += X2[ai(v,u,r,n,p,k)] * Y2[ai(v,mm,r,n,m,k)]; + ps2[ai(u,mm,r,p,m,k)] = dotProduct; + } + } + for (int j=0; j<p; j++) + { + for (int s=0; s<p; s++) + { + //Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r]) + Real dotProduct = 0.; + for (int u=0; u<n; u++) + dotProduct += X2[ai(u,j,r,n,p,k)] * X2[ai(u,s,r,n,p,k)]; + Gram2[ai(j,s,r,p,p,k)] = dotProduct; + } + } + } + + ///////////// + // Etape M // + ///////////// + + // Pour pi + for (int r=0; r<k; r++) + { + //b[r] = sum(abs(phi[,,r])) + Real sumAbsPhi = 0.; + for (int u=0; u<p; u++) + for (int v=0; v<m; v++) + sumAbsPhi += fabs(phi[ai(u,v,r,p,m,k)]); + b[r] = sumAbsPhi; + } + //gam2 = colSums(gam) + for (int u=0; u<k; u++) + { + Real sumOnColumn = 0.; + for (int v=0; v<n; v++) + sumOnColumn += gam[mi(v,u,n,k)]; + gam2[u] = sumOnColumn; + } + //a = sum(gam %*% log(pi)) + Real a = 0.; + for (int u=0; u<n; u++) + { + Real dotProduct = 0.; + for (int v=0; v<k; v++) + dotProduct += gam[mi(u,v,n,k)] * log(pi[v]); + a += dotProduct; + } + + //tant que les proportions sont negatives + int kk = 0, + pi2AllPositive = 0; + Real invN = 1./n; + while (!pi2AllPositive) + { + //pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi) + Real pow_01_kk = pow(0.1,kk); + for (int r=0; r<k; r++) + pi2[r] = pi[r] + pow_01_kk * (invN*gam2[r] - pi[r]); + //pi2AllPositive = all(pi2 >= 0) + pi2AllPositive = 1; + for (int r=0; r<k; r++) + { + if (pi2[r] < 0) + { + pi2AllPositive = 0; + break; + } + } + kk++; + } + + //sum(pi^gamma * b) + Real piPowGammaDotB = 0.; + for (int v=0; v<k; v++) + piPowGammaDotB += pow(pi[v],gamma) * b[v]; + //sum(pi2^gamma * b) + Real pi2PowGammaDotB = 0.; + for (int v=0; v<k; v++) + pi2PowGammaDotB += pow(pi2[v],gamma) * b[v]; + //sum(gam2 * log(pi2)) + Real gam2DotLogPi2 = 0.; + for (int v=0; v<k; v++) + gam2DotLogPi2 += gam2[v] * log(pi2[v]); + + //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante + while (-invN*a + lambda*piPowGammaDotB < -invN*gam2DotLogPi2 + lambda*pi2PowGammaDotB + && kk<1000) + { + Real pow_01_kk = pow(0.1,kk); + //pi2 = pi + 0.1^kk * (1/n*gam2 - pi) + for (int v=0; v<k; v++) + pi2[v] = pi[v] + pow_01_kk * (invN*gam2[v] - pi[v]); + //pi2 was updated, so we recompute pi2PowGammaDotB and gam2DotLogPi2 + pi2PowGammaDotB = 0.; + for (int v=0; v<k; v++) + pi2PowGammaDotB += pow(pi2[v],gamma) * b[v]; + gam2DotLogPi2 = 0.; + for (int v=0; v<k; v++) + gam2DotLogPi2 += gam2[v] * log(pi2[v]); + kk++; + } + Real t = pow(0.1,kk); + //sum(pi + t*(pi2-pi)) + Real sumPiPlusTbyDiff = 0.; + for (int v=0; v<k; v++) + sumPiPlusTbyDiff += (pi[v] + t*(pi2[v] - pi[v])); + //pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi)) + for (int v=0; v<k; v++) + pi[v] = (pi[v] + t*(pi2[v] - pi[v])) / sumPiPlusTbyDiff; + + //Pour phi et rho + for (int r=0; r<k; r++) + { + for (int mm=0; mm<m; mm++) + { + Real ps = 0., + nY2 = 0.; + // Compute ps, and nY2 = sum(Y2[,mm,r]^2) + for (int i=0; i<n; i++) + { + //< X2[i,,r] , phi[,mm,r] > + Real dotProduct = 0.; + for (int u=0; u<p; u++) + dotProduct += X2[ai(i,u,r,n,p,k)] * phi[ai(u,mm,r,p,m,k)]; + //ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r]) + ps += Y2[ai(i,mm,r,n,m,k)] * dotProduct; + nY2 += Y2[ai(i,mm,r,n,m,k)] * Y2[ai(i,mm,r,n,m,k)]; + } + //rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2) + rho[ai(mm,mm,r,m,m,k)] = (ps + sqrt(ps*ps + 4*nY2 * gam2[r])) / (2*nY2); + } + } + + for (int r=0; r<k; r++) + { + for (int j=0; j<p; j++) + { + for (int mm=0; mm<m; mm++) + { + //sum(phi[-j,mm,r] * Gram2[j,-j,r]) + Real phiDotGram2 = 0.; + for (int u=0; u<p; u++) + { + if (u != j) + phiDotGram2 += phi[ai(u,mm,r,p,m,k)] * Gram2[ai(j,u,r,p,p,k)]; + } + //S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r]) + S[ai(j,mm,r,p,m,k)] = -rho[ai(mm,mm,r,m,m,k)] * ps2[ai(j,mm,r,p,m,k)] + + phiDotGram2; + Real pirPowGamma = pow(pi[r],gamma); + if (fabs(S[ai(j,mm,r,p,m,k)]) <= n*lambda*pirPowGamma) + phi[ai(j,mm,r,p,m,k)] = 0.; + else if (S[ai(j,mm,r,p,m,k)] > n*lambda*pirPowGamma) + { + phi[ai(j,mm,r,p,m,k)] = (n*lambda*pirPowGamma - S[ai(j,mm,r,p,m,k)]) + / Gram2[ai(j,j,r,p,p,k)]; + } + else + { + phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pirPowGamma + S[ai(j,mm,r,p,m,k)]) + / Gram2[ai(j,j,r,p,p,k)]; + } + } + } + } + + ///////////// + // Etape E // + ///////////// + + // Precompute det(rho[,,r]) for r in 1...k + int signum; + for (int r=0; r<k; r++) + { + for (int u=0; u<m; u++) + { + for (int v=0; v<m; v++) + matrix->data[u*m+v] = rho[ai(u,v,r,m,m,k)]; + } + gsl_linalg_LU_decomp(matrix, permutation, &signum); + detRho[r] = gsl_linalg_LU_det(matrix, signum); + } + + Real sumLogLLH = 0.; + for (int i=0; i<n; i++) + { + for (int r=0; r<k; r++) + { + //compute Y[i,]%*%rho[,,r] + for (int u=0; u<m; u++) + { + YiRhoR[u] = 0.; + for (int v=0; v<m; v++) + YiRhoR[u] += Y[mi(i,v,n,m)] * rho[ai(v,u,r,m,m,k)]; + } + + //compute X[i,]%*%phi[,,r] + for (int u=0; u<m; u++) + { + XiPhiR[u] = 0.; + for (int v=0; v<p; v++) + XiPhiR[u] += X[mi(i,v,n,p)] * phi[ai(v,u,r,p,m,k)]; + } + + //compute sq norm || Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) ||_2^2 + sqNorm2[r] = 0.; + for (int u=0; u<m; u++) + sqNorm2[r] += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]); + } + + // Update gam[,]; use log to avoid numerical problems + Real maxLogGam = -INFINITY; + for (int r=0; r<k; r++) + { + logGam[r] = log(pi[r]) - .5 * sqNorm2[r] + log(detRho[r]); + if (maxLogGam < logGam[r]) + maxLogGam = logGam[r]; + } + Real norm_fact = 0.; + for (int r=0; r<k; r++) + { + logGam[r] = logGam[r] - maxLogGam; //adjust without changing proportions + gam[mi(i,r,n,k)] = exp(logGam[r]); //gam[i, ] <- exp(logGam) + norm_fact += gam[mi(i,r,n,k)]; //norm_fact <- sum(gam[i, ]) + } + // gam[i, ] <- gam[i, ] / norm_fact + for (int r=0; r<k; r++) + gam[mi(i,r,n,k)] /= norm_fact; + + sumLogLLH += log(norm_fact) - log(gaussConstM); + } + + //sumPen = sum(pi^gamma * b) + Real sumPen = 0.; + for (int r=0; r<k; r++) + sumPen += pow(pi[r],gamma) * b[r]; + Real last_llh = *llh; + //llh = -sumLogLLH/n #+ lambda*sumPen + *llh = -invN * sumLogLLH; //+ lambda * sumPen; + Real dist = ( ite==1 ? *llh : (*llh - last_llh) / (1. + fabs(*llh)) ); + + //Dist1 = max( abs(phi-Phi) / (1+abs(phi)) ) + Real Dist1 = 0.; + for (int u=0; u<p; u++) + { + for (int v=0; v<m; v++) + { + for (int w=0; w<k; w++) + { + Real tmpDist = fabs(phi[ai(u,v,w,p,m,k)]-Phi[ai(u,v,w,p,m,k)]) + / (1.+fabs(phi[ai(u,v,w,p,m,k)])); + if (tmpDist > Dist1) + Dist1 = tmpDist; + } + } + } + //Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) + Real Dist2 = 0.; + for (int u=0; u<m; u++) + { + for (int v=0; v<m; v++) + { + for (int w=0; w<k; w++) + { + Real tmpDist = fabs(rho[ai(u,v,w,m,m,k)]-Rho[ai(u,v,w,m,m,k)]) + / (1.+fabs(rho[ai(u,v,w,m,m,k)])); + if (tmpDist > Dist2) + Dist2 = tmpDist; + } + } + } + //Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi))) + Real Dist3 = 0.; + for (int u=0; u<n; u++) + { + for (int v=0; v<k; v++) + { + Real tmpDist = fabs(pi[v]-Pi[v]) / (1.+fabs(pi[v])); + if (tmpDist > Dist3) + Dist3 = tmpDist; + } + } + //dist2=max([max(Dist1),max(Dist2),max(Dist3)]); + Real dist2 = Dist1; + if (Dist2 > dist2) + dist2 = Dist2; + if (Dist3 > dist2) + dist2 = Dist3; + + if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps))) + break; + } + + //affec = apply(gam, 1, which.max) + for (int i=0; i<n; i++) + { + Real rowMax = 0.; + affec[i] = 0; + for (int j=0; j<k; j++) + { + if (gam[mi(i,j,n,k)] > rowMax) + { + affec[i] = j+1; //R indices start at 1 + rowMax = gam[mi(i,j,n,k)]; + } + } + } + + //free memory + free(b); + free(gam); + free(logGam); + free(Phi); + free(Rho); + free(Pi); + free(Gram2); + free(ps2); + free(detRho); + gsl_matrix_free(matrix); + gsl_permutation_free(permutation); + free(XiPhiR); + free(YiRhoR); + free(gam2); + free(pi2); + free(X2); + free(Y2); + free(sqNorm2); +} diff --git a/pkg/src/sources/EMGLLF.h b/pkg/src/sources/EMGLLF.h new file mode 100644 index 0000000..e15cb87 --- /dev/null +++ b/pkg/src/sources/EMGLLF.h @@ -0,0 +1,32 @@ +#ifndef valse_EMGLLF_H +#define valse_EMGLLF_H + +#include "utils.h" + +void EMGLLF_core( + // IN parameters + const Real* phiInit, + const Real* rhoInit, + const Real* piInit, + const Real* gamInit, + int mini, + int maxi, + Real gamma, + Real lambda, + const Real* X, + const Real* Y, + Real tau, + // OUT parameters + Real* phi, + Real* rho, + Real* pi, + Real* LLF, + Real* S, + int* affec, + // additional size parameters + int n, + int p, + int m, + int k); + +#endif diff --git a/pkg/src/sources/EMGrank.c b/pkg/src/sources/EMGrank.c new file mode 100644 index 0000000..3a9bf94 --- /dev/null +++ b/pkg/src/sources/EMGrank.c @@ -0,0 +1,307 @@ +#include <stdlib.h> +#include <gsl/gsl_linalg.h> +#include "utils.h" + +// Compute pseudo-inverse of a square matrix +static Real* pinv(const Real* matrix, int dim) +{ + gsl_matrix* U = gsl_matrix_alloc(dim,dim); + gsl_matrix* V = gsl_matrix_alloc(dim,dim); + gsl_vector* S = gsl_vector_alloc(dim); + gsl_vector* work = gsl_vector_alloc(dim); + Real EPS = 1e-10; //threshold for singular value "== 0" + + //copy matrix into U + copyArray(matrix, U->data, dim*dim); + + //U,S,V = SVD of matrix + gsl_linalg_SV_decomp(U, V, S, work); + gsl_vector_free(work); + + // Obtain pseudo-inverse by V*S^{-1}*t(U) + Real* inverse = (Real*)malloc(dim*dim*sizeof(Real)); + for (int i=0; i<dim; i++) + { + for (int ii=0; ii<dim; ii++) + { + Real dotProduct = 0.0; + for (int j=0; j<dim; j++) + dotProduct += V->data[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j]; + inverse[i*dim+ii] = dotProduct; + } + } + + gsl_matrix_free(U); + gsl_matrix_free(V); + gsl_vector_free(S); + return inverse; +} + +// TODO: comment EMGrank purpose +void EMGrank_core( + // IN parameters + const Real* Pi, // parametre de proportion + const Real* Rho, // parametre initial de variance renormalisé + int mini, // nombre minimal d'itérations dans l'algorithme EM + int maxi, // nombre maximal d'itérations dans l'algorithme EM + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + const int* rank, // vecteur des rangs possibles + // OUT parameters + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres + // additional size parameters + int n, // taille de l'echantillon + int p, // nombre de covariables + int m, // taille de Y (multivarié) + int k) // nombre de composantes +{ + // Allocations, initializations + Real* Phi = (Real*)calloc(p*m*k,sizeof(Real)); + Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real)); + int signum; + Real invN = 1.0/n; + int deltaPhiBufferSize = 20; + Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real)); + int ite = 0; + Real sumDeltaPhi = 0.0; + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real* Xr = (Real*)malloc(n*p*sizeof(Real)); + Real* Yr = (Real*)malloc(n*m*sizeof(Real)); + Real* tXrXr = (Real*)malloc(p*p*sizeof(Real)); + Real* tXrYr = (Real*)malloc(p*m*sizeof(Real)); + gsl_matrix* matrixM = gsl_matrix_alloc(p, m); + gsl_matrix* matrixE = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + gsl_matrix* V = gsl_matrix_alloc(m,m); + gsl_vector* S = gsl_vector_alloc(m); + gsl_vector* work = gsl_vector_alloc(m); + + //Initialize class memberships (all elements in class 0; TODO: randomize ?) + int* Z = (int*)calloc(n, sizeof(int)); + + //Initialize phi to zero, because some M loops might exit before phi affectation + zeroArray(phi, p*m*k); + + while (ite<mini || (ite<maxi && sumDeltaPhi>tau)) + { + ///////////// + // Etape M // + ///////////// + + //M step: Mise à jour de Beta (et donc phi) + for (int r=0; r<k; r++) + { + //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:) + int cardClustR=0; + for (int i=0; i<n; i++) + { + if (Z[i] == r) + { + for (int j=0; j<p; j++) + Xr[mi(cardClustR,j,n,p)] = X[mi(i,j,n,p)]; + for (int j=0; j<m; j++) + Yr[mi(cardClustR,j,n,m)] = Y[mi(i,j,n,m)]; + cardClustR++; + } + } + if (cardClustR == 0) + continue; + + //Compute tXrXr = t(Xr) * Xr + for (int j=0; j<p; j++) + { + for (int jj=0; jj<p; jj++) + { + Real dotProduct = 0.0; + for (int u=0; u<cardClustR; u++) + dotProduct += Xr[mi(u,j,n,p)] * Xr[mi(u,jj,n,p)]; + tXrXr[mi(j,jj,p,p)] = dotProduct; + } + } + + //Get pseudo inverse = (t(Xr)*Xr)^{-1} + Real* invTXrXr = pinv(tXrXr, p); + + // Compute tXrYr = t(Xr) * Yr + for (int j=0; j<p; j++) + { + for (int jj=0; jj<m; jj++) + { + Real dotProduct = 0.0; + for (int u=0; u<cardClustR; u++) + dotProduct += Xr[mi(u,j,n,p)] * Yr[mi(u,jj,n,m)]; + tXrYr[mi(j,jj,p,m)] = dotProduct; + } + } + + //Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr + for (int j=0; j<p; j++) + { + for (int jj=0; jj<m; jj++) + { + Real dotProduct = 0.0; + for (int u=0; u<p; u++) + dotProduct += invTXrXr[mi(j,u,p,p)] * tXrYr[mi(u,jj,p,m)]; + matrixM->data[j*m+jj] = dotProduct; + } + } + free(invTXrXr); + + //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + gsl_linalg_SV_decomp(matrixM, V, S, work); + + //Set m-rank(r) singular values to zero, and recompose + //best rank(r) approximation of the initial product + for (int j=rank[r]; j<m; j++) + S->data[j] = 0.0; + + //[intermediate step] Compute hatBetaR = U * S * t(V) + double* U = matrixM->data; //GSL require double precision + for (int j=0; j<p; j++) + { + for (int jj=0; jj<m; jj++) + { + Real dotProduct = 0.0; + for (int u=0; u<m; u++) + dotProduct += U[j*m+u] * S->data[u] * V->data[jj*m+u]; + hatBetaR[mi(j,jj,p,m)] = dotProduct; + } + } + + //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r) + for (int j=0; j<p; j++) + { + for (int jj=0; jj<m; jj++) + { + Real dotProduct=0.0; + for (int u=0; u<m; u++) + dotProduct += hatBetaR[mi(j,u,p,m)] * Rho[ai(u,jj,r,m,m,k)]; + phi[ai(j,jj,r,p,m,k)] = dotProduct; + } + } + } + + ///////////// + // Etape E // + ///////////// + + Real sumLogLLF2 = 0.0; + for (int i=0; i<n; i++) + { + Real sumLLF1 = 0.0; + Real maxLogGamIR = -INFINITY; + for (int r=0; r<k; r++) + { + //Compute + //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)... + //*phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) ); + //split in several sub-steps + + //compute det(Rho(:,:,r)) [TODO: avoid re-computations] + for (int j=0; j<m; j++) + { + for (int jj=0; jj<m; jj++) + matrixE->data[j*m+jj] = Rho[ai(j,jj,r,m,m,k)]; + } + gsl_linalg_LU_decomp(matrixE, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrixE, signum); + + //compute Y(i,:)*Rho(:,:,r) + for (int j=0; j<m; j++) + { + YiRhoR[j] = 0.0; + for (int u=0; u<m; u++) + YiRhoR[j] += Y[mi(i,u,n,m)] * Rho[ai(u,j,r,m,m,k)]; + } + + //compute X(i,:)*phi(:,:,r) + for (int j=0; j<m; j++) + { + XiPhiR[j] = 0.0; + for (int u=0; u<p; u++) + XiPhiR[j] += X[mi(i,u,n,p)] * phi[ai(u,j,r,p,m,k)]; + } + + //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) > + Real dotProduct = 0.0; + for (int u=0; u<m; u++) + dotProduct += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]); + Real logGamIR = log(Pi[r]) + log(detRhoR) - 0.5*dotProduct; + + //Z(i) = index of max (gam(i,:)) + if (logGamIR > maxLogGamIR) + { + Z[i] = r; + maxLogGamIR = logGamIR; + } + sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0); + } + + sumLogLLF2 += log(sumLLF1); + } + + // Assign output variable LLF + *LLF = -invN * sumLogLLF2; + + //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi)))); + Real newDeltaPhi = 0.0; + for (int j=0; j<p; j++) + { + for (int jj=0; jj<m; jj++) + { + for (int r=0; r<k; r++) + { + Real tmpDist = fabs(phi[ai(j,jj,r,p,m,k)]-Phi[ai(j,jj,r,p,m,k)]) + / (1.0+fabs(phi[ai(j,jj,r,p,m,k)])); + if (tmpDist > newDeltaPhi) + newDeltaPhi = tmpDist; + } + } + } + + //update distance parameter to check algorithm convergence (delta(phi, Phi)) + //TODO: deltaPhi should be a linked list for perf. + if (ite < deltaPhiBufferSize) + deltaPhi[ite] = newDeltaPhi; + else + { + sumDeltaPhi -= deltaPhi[0]; + for (int u=0; u<deltaPhiBufferSize-1; u++) + deltaPhi[u] = deltaPhi[u+1]; + deltaPhi[deltaPhiBufferSize-1] = newDeltaPhi; + } + sumDeltaPhi += newDeltaPhi; + + // update other local variables + for (int j=0; j<m; j++) + { + for (int jj=0; jj<p; jj++) + { + for (int r=0; r<k; r++) + Phi[ai(jj,j,r,p,m,k)] = phi[ai(jj,j,r,p,m,k)]; + } + } + ite++; + } + + //free memory + free(hatBetaR); + free(deltaPhi); + free(Phi); + gsl_matrix_free(matrixE); + gsl_matrix_free(matrixM); + gsl_permutation_free(permutation); + gsl_vector_free(work); + gsl_matrix_free(V); + gsl_vector_free(S); + free(XiPhiR); + free(YiRhoR); + free(Xr); + free(Yr); + free(tXrXr); + free(tXrYr); + free(Z); +} diff --git a/pkg/src/sources/EMGrank.h b/pkg/src/sources/EMGrank.h new file mode 100644 index 0000000..b7367d8 --- /dev/null +++ b/pkg/src/sources/EMGrank.h @@ -0,0 +1,25 @@ +#ifndef valse_EMGrank_H +#define valse_EMGrank_H + +#include "utils.h" + +void EMGrank_core( + // IN parameters + const Real* Pi, + const Real* Rho, + int mini, + int maxi, + const Real* X, + const Real* Y, + Real tau, + const int* rank, + // OUT parameters + Real* phi, + Real* LLF, + // additional size parameters + int n, + int p, + int m, + int k); + +#endif diff --git a/pkg/src/sources/utils.h b/pkg/src/sources/utils.h new file mode 100644 index 0000000..be3a72f --- /dev/null +++ b/pkg/src/sources/utils.h @@ -0,0 +1,48 @@ +#ifndef valse_utils_H +#define valse_utils_H + +//#include <stdint.h> + +/******** + * Types + *******/ + +typedef double Real; +//typedef uint32_t UInt; +//typedef int32_t Int; + +/******************************* + * Matrix and arrays indexation + *******************************/ + +// Matrix Index ; TODO? ncol unused +#define mi(i,j,nrow,ncol)\ + j*nrow + i + +// Array Index ; TODO? d3 unused +#define ai(i,j,k,d1,d2,d3)\ + k*d1*d2 + j*d1 + i + +// Array4 Index ; TODO? ... +#define ai4(i,j,k,m,d1,d2,d3,d4)\ + m*d1*d2*d3 + k*d1*d2 + j*d1 + i + +/************************* + * Array copy & "zeroing" + ************************/ + +// Fill an array with zeros +#define zeroArray(array, size)\ +{\ + for (int u=0; u<size; u++)\ + array[u] = 0;\ +} + +// Copy an 1D array +#define copyArray(array, copy, size)\ +{\ + for (int u=0; u<size; u++)\ + copy[u] = array[u];\ +} + +#endif diff --git a/pkg/tests/testthat.R b/pkg/tests/testthat.R new file mode 100644 index 0000000..88e5631 --- /dev/null +++ b/pkg/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(valse) #ou load_all() + +test_check("valse") diff --git a/pkg/tests/testthat/helper-context1.R b/pkg/tests/testthat/helper-context1.R new file mode 100644 index 0000000..b40f358 --- /dev/null +++ b/pkg/tests/testthat/helper-context1.R @@ -0,0 +1,5 @@ +# Potential helpers for context 1 +help <- function() +{ + #... +} diff --git a/pkg/tests/testthat/test-context1.R b/pkg/tests/testthat/test-context1.R new file mode 100644 index 0000000..17c633f --- /dev/null +++ b/pkg/tests/testthat/test-context1.R @@ -0,0 +1,11 @@ +context("Context1") + +test_that("function 1...", +{ + #expect_lte( ..., ...) +}) + +test_that("function 2...", +{ + #expect_equal(..., ...) +}) diff --git a/pkg/vignettes/.gitignore b/pkg/vignettes/.gitignore new file mode 100644 index 0000000..e6493d4 --- /dev/null +++ b/pkg/vignettes/.gitignore @@ -0,0 +1,14 @@ +#ignore jupyter generated file (ipynb, HTML) +*.html +*.ipynb + +#and various (pdf)LaTeX files, in case of +*.tex +*.pdf +*.aux +*.dvi +*.log +*.out +*.toc +*.synctex.gz +/figure/ diff --git a/reports/bazar_Emilie/dessinBeta.m b/reports/bazar_Emilie/dessinBeta.m new file mode 100644 index 0000000..78c3b91 --- /dev/null +++ b/reports/bazar_Emilie/dessinBeta.m @@ -0,0 +1,97 @@ +niveauColores + +for L=ind + for r=1:max(b(:,L)) + betaLassoMLE(:,:,r)=inv(rhoLassoMLE(:,:,r,L)) * phiLassoMLE(:,:,r,L); + end +end +dessinRho = zeros(max(K),size(rhoLassoMLE,1),max(ind)); +cpt=1; +gr = gray(64); + + for L=ind + k=size(find(piLassoMLE(:,L)~=0),1) + figure(1) + %for r=1:k + r=1 + subplot(1,k+1,r) + cpt=cpt+1; + colormap(gr(end:-1:1,:)); + imagesc(abs(betaLassoMLE(:,:,r))) + set(gca,'xtick',[],'ytick',[]) + set(gca, 'FontSize', 30) + title(['$\hat{\underline{\beta}_1}$'],'FontSize', 45,'Interpreter','latex') + h=colorbar; + set(gca, 'FontSize', 30) + r=2 + subplot(1,k+1,r) + cpt=cpt+1; + colormap(gr(end:-1:1,:)); + imagesc(abs(betaLassoMLE(:,:,r))) + set(gca,'xtick',[],'ytick',[]) + set(gca, 'FontSize', 30) + title(['$\hat{\underline{\beta}_2}$'],'FontSize', 45,'Interpreter','latex') + h=colorbar; + set(gca, 'FontSize', 30) + + %end + subplot(1,k+1,k+1) + gr = gray(64); + colormap(gr(end:-1:1,:)); + imagesc(abs(betaLassoMLE(:,:,1)-betaLassoMLE(:,:,2))) + set(gca,'xtick',[],'ytick',[]) + title('$|\hat{\underline{\beta}_1}-\hat{\underline{\beta}}_2|$','FontSize',45,'Interpreter','latex') + h=colorbar; + set(gca, 'FontSize', 30) + figure(L+k+1) + for r=1:k + for z=1:size(rhoLassoMLE,1) + dessinRho(r,z,L) = inv(rhoLassoMLE(z,z,r,L)^2); + end + end + + %for r=1:k + r=1 + subplot(k,1,r) +% if r==1 +% colormap(cmapRouge(end:-1:1,:)); +% else if r==2 +% colormap(cmapBleu(end:-1:1,:)); +% else colormap(cmapVert(end:-1:1,:)); +% end +% end + imagesc(dessinRho(r,:,L)) + set(gca,'xtick',[],'ytick',[]) + xlabh = get(gca,'XLabel'); + set(xlabh,'Position',get(xlabh,'Position') + [0 .15 0]) + title('$\hat{\Sigma}_1$','FontSize', 55,'Interpreter','latex') + xlabel('d_4[1] d_4[2] d_4[3] d_3[1] d_3[2] d_3[3] d_3[4] d_3[5] d_3[6]','FontSize', 30) + set(gca, 'FontSize', 45) + set(gca,'Position',[0.01 0.1+0.5*(2-r) 0.9 0.2]) + + r=2 + subplot(k,1,r) +% if r==1 +% colormap(cmapRouge(end:-1:1,:)); +% else if r==2 +% colormap(cmapBleu(end:-1:1,:)); +% else colormap(cmapVert(end:-1:1,:)); +% end +% end + imagesc(dessinRho(r,:,L)) + set(gca,'xtick',[],'ytick',[]) + xlabh = get(gca,'XLabel'); + set(xlabh,'Position',get(xlabh,'Position') + [0 .15 0]) + title('$\hat{\Sigma}_2$','FontSize', 55,'Interpreter','latex') + xlabel('d_4[1] d_4[2] d_4[3] d_3[1] d_3[2] d_3[3] d_3[4] d_3[5] d_3[6]','FontSize', 30) + set(gca, 'FontSize', 45) + set(gca,'Position',[0.01 0.1+0.5*(2-r) 0.9 0.2]) + %end + %h=colorbar; + gr = gray(64); + colormap(gr(end:-1:1,:)); + axes('Position', [0.14 0.1 0.8 0.8], 'Visible', 'off'); +% c=colorbar; +% set(gca, 'FontSize', 40) +% caxis([0 max(max(dessinRho(:,:,L)))]) +end \ No newline at end of file diff --git a/reports/bazar_Emilie/niveauColores.m b/reports/bazar_Emilie/niveauColores.m new file mode 100644 index 0000000..41a46ff --- /dev/null +++ b/reports/bazar_Emilie/niveauColores.m @@ -0,0 +1,18 @@ +%niveau de rouge +cmapRouge = ones(100,3); +for i=1:100 + cmapRouge(i,2) = i/100; + cmapRouge(i,3) = i/100; +end +%niveau de vert +cmapVert = ones(100,3); +for i=1:100 + cmapVert(i,1) = i/100; + cmapVert(i,3) = i/100; +end +%niveau de bleu +cmapBleu = ones(100,3); +for i=1:100 + cmapBleu(i,1) = i/100; + cmapBleu(i,2) = i/100; +end \ No newline at end of file diff --git a/reports/bazar_Emilie/ondelettes.m b/reports/bazar_Emilie/ondelettes.m new file mode 100644 index 0000000..b54635f --- /dev/null +++ b/reports/bazar_Emilie/ondelettes.m @@ -0,0 +1,83 @@ +%Dessin ondelettes +%script +figure(1) +courbe = X2(:,1)-mean(X2(:,1)); +[C,L] = wavedec(courbe, 4,'haar'); + +subplot(6,1,1) +plot(1:7,courbe(1:7),'b','LineWidth',2) +axis([0 48 -1.6 1.6]) +hold on +plot(8:48,courbe(8:48),'b','LineWidth',2) +%plot(waverec([C(1:12)',zeros(1,36)],L,'haar'),'b--','LineWidth',2) +%courbe2 = X2(:,1); +%[C2,L2] = wavedec(courbe2,4,'haar'); +%plot(waverec([zeros(1,3),C2(4:12)',zeros(1,36)],L2,'haar'),'b-.','LineWidth',2) +hold off +ylabel('z','FontSize', 30) +set(gca, 'FontSize', 20) +subplot(6,1,2) +A4=zeros(1,48); +Coeff = zeros(5,48); +for r=1:16 + A4(r)=C(1)/power(2,5/2); + A4(r+16)=C(2)/power(2,5/2); + A4(r+32) = C(3)/power(2,5/2); +end +Coeff(5,8)=C(1); +Coeff(5,24) = C(2); +Coeff(5,40) = C(3); +stairs(A4,'b','LineWidth',2) +axis([0 48 -0.5 0.5]) +ylabel('A_4','FontSize', 30) +set(gca, 'FontSize', 20) +subplot(6,1,3) +D4=zeros(1,48); +for r=1:8 + D4(r) = C(4)/power(2,4/2); + D4(r+8) = -C(4)/power(2,4/2); + D4(r+16) = C(5)/power(2,4/2); + D4(r+24) = -C(5)/power(2,4/2); + D4(r+32) = C(6)/power(2,4/2); + D4(r+40) = -C(6)/power(2,4/2); +end +Coeff(4,8) = C(4); +Coeff(4,24) = C(5); +Coeff(4,40) = C(6); +stairs(D4,'b','LineWidth',2) +axis([0 48 -0.9 0.9]) +ylabel('D_4','FontSize', 30) + set(gca, 'FontSize', 20) +subplot(6,1,4) +D3=zeros(1,48); +for k=1:12 + for r=1:4 + D3(r+4*(k-1)) = (-1)^(k+1) *C(7+floor((k-1)/2))/power(2,3/2); + end +end +stairs(D3,'b','LineWidth',2) +ylabel('D_3','FontSize', 30) +axis([0 48 -0.5 0.5]) + set(gca, 'FontSize', 20) +subplot(6,1,5) +D2=zeros(1,48); +for k=1:24 + for r=1:2 + D2(r+2*(k-1)) = (-1)^(k+1) *C(13+floor((k-1)/2))/power(2,2/2); + end +end +stairs(D2,'b','LineWidth',2) +ylabel('D_2','FontSize', 30) +axis([0 48 -0.8 0.8]) + set(gca, 'FontSize', 20) +subplot(6,1,6) +D1=zeros(1,48); +for k=1:48 + for r=1 + D1(r+1*(k-1)) = (-1)^(k+1) *C(25+floor((k-1)/2))/power(2,1/2); + end +end +plot(D1,'b','LineWidth',2); +axis([0 48 -0.9 0.9]) +ylabel('D_1','FontSize', 30) +set(gca, 'FontSize', 20) diff --git a/reports/bazar_Emilie/ondelettes2.m b/reports/bazar_Emilie/ondelettes2.m new file mode 100644 index 0000000..9d4768a --- /dev/null +++ b/reports/bazar_Emilie/ondelettes2.m @@ -0,0 +1,67 @@ +ondelettes +niveauColores +figure(2) +subplot(6,1,1) +plot(1:7,courbe(1:7),'b','LineWidth',2) +hold on +plot(8:48,courbe(8:48),'b','LineWidth',2) +set(gca,'ytick',[]) +ylabel('z','FontSize', 30) +set(gca, 'FontSize', 20) +axis([0 48 -1.6 1.6]) +set(B, 'Position', [.91 .11 .03 .8150]) +subplot(6,1,2:6) +colormap([cmapVert([1:1:end],:);cmapBleu(end:-1:1,:)]); + %colormap(gr(end:-1:1,:)); +indice = [3,3,6,12,24]; +indice2=[0,cumsum(indice)]; +beta = zeros(5,48); +for j=1:indice(1) + for l=1:2^4 + beta(1,(j-1)*2^4+l) = C(indice2(1)+j); + end +end +for r=2:5 + for j=1:indice(r) + for l=1:2^(5-(r-1)) + beta(r,(j-1)*2^(5-(r-1))+l) = C(indice2(r)+j); + end + end +end + + +imagesc(beta) +ylabel('d_1 d_2 d_3 d_4 a_4','FontSize', 30) +set(gca, 'FontSize', 20) +%gr = gray(64); + + + +set(gca,'xtick',[],'ytick',[]) +set(gca, 'FontSize', 20) + +% axes('Position', [0.05 0.05 0.9 0.9], 'Visible', 'off'); +% c=colorbar ('FontSize',18); +B=colorbar; +set(B, 'Position', [.91 .11 .03 .8150]) +%set(gca,'Position',[.1 0.1 0.9 0.9]) +set(gca, 'FontSize', 15) +% figure(3) +% subplot(6,1,1) +% plot(courbe,'LineWidth',2) +% axis([0 48 -0.7 0.7]) +% subplot(6,1,2) +% plot([8,24,40],C(1:3),'+','LineWidth',2) +% axis([0 48 -1.6 1.6]) +% subplot(6,1,3) +% plot([8:16:48],C(4:6),'+','LineWidth',2) +% axis([0 48 -0.6 0.6]) +% subplot(6,1,4) +% plot([4:8:48],C(7:12),'+','LineWidth',2) +% axis([0 48 -0.4 0.4]) +% subplot(6,1,5) +% plot([2:4:48],C(13:24),'+','LineWidth',2) +% axis([0 48 -0.2 0.2]) +% subplot(6,1,6) +% plot([1:2:48],C(25:48),'+','LineWidth',2) +% axis([0 48 -0.2 0.2]) \ No newline at end of file diff --git a/reports/bazar_Emilie/pretraitement.m b/reports/bazar_Emilie/pretraitement.m new file mode 100644 index 0000000..b0cf534 --- /dev/null +++ b/reports/bazar_Emilie/pretraitement.m @@ -0,0 +1,22 @@ +%Dessins prétraitements +script + +XC=X2(:,1)-mean(X2(:,1)); + +[C1,L1] = wavedec(X2(:,1),4,'haar'); +xProj1=C(4:12); + +[C2,L2]=wavedec(XC',4,'haar'); +xProj2=C(1:12); + +Xrecon1 = waverec([zeros(1,3),xProj1',zeros(1,36)],L1,'haar'); +Xrecon2 = waverec([xProj2',zeros(1,36)],L2,'haar'); + +plot(X2(:,1),'LineWidth',2) +hold on +plot(Xrecon1,'r-o','LineWidth',2) +plot(Xrecon2,'g-s','LineWidth',2) +xlabel('Instant of the day','FontSize', 30) +ylabel(['Load consumption and its reconstructions' sprintf('\n') ' after projecting and preprocessing'],'FontSize', 30) +set(gca, 'FontSize', 20, 'fontName','Times'); +legend('Original signal','Reconstruction after projection and preprocessing 1','Reconstruction after projection and preprocessing 2','Location','NorthWest') \ No newline at end of file diff --git a/reports/bazar_Emilie/recomp.m b/reports/bazar_Emilie/recomp.m new file mode 100644 index 0000000..b60ed53 --- /dev/null +++ b/reports/bazar_Emilie/recomp.m @@ -0,0 +1,29 @@ +ychap=zeros(n,m,k,94); +%Ychap=zeros(n,m,l,94); + +for LL=1:10 + for i=1:n + for r=1:2 + ychap(i,:,r,LL)=x(i,:)*(phitrue(:,:,r,LL)*inv(rhotrue(:,:,r,LL)))'; + Ychap(i,:,r,LL)=waverec(ychap(i,:,r,LL)',L,'sym4'); + end + end +end +for i=1:n + for r=1:2 + for LL=1:10 + RMSE(i,LL,r)=sqrt(sum((donneesC(78:96,i)-Ychap(i,:,r,LL)').^2)); + end + end +end +LL=88; +plot(Ychap(89,:,2,LL),'r') +hold on +plot(donnees(49:96,89)/100,'b') +hold off + + +for LL=1:10 + Z(LL,:)=principeMAP(Y,X,phiLassoMLE(:,:,:,LL),rhoLassoMLE(:,:,:,LL),piLassoMLE(:,LL),3.14); + sum((Z(1:100)==1)+(Z(101:200)==2)) +end \ No newline at end of file diff --git a/reports/bazar_Emilie/reconstruction.m b/reports/bazar_Emilie/reconstruction.m new file mode 100644 index 0000000..ea6c3e6 --- /dev/null +++ b/reports/bazar_Emilie/reconstruction.m @@ -0,0 +1,7 @@ +beta = betaLassoMLE(:,:,1:2,ind); +for i=1:338 + for r=1:2 + YChap(:,i,r) = X(i,:) * beta(:,:,r); + yChap(:,i,r) = waverec([zeros(1,3),YChap(:,i,r)',zeros(1,36)],L,'haar'); + end +end diff --git a/reports/bazar_Emilie/resultatSousEns.m b/reports/bazar_Emilie/resultatSousEns.m new file mode 100644 index 0000000..f8dca24 --- /dev/null +++ b/reports/bazar_Emilie/resultatSousEns.m @@ -0,0 +1,21 @@ +ind=indiceLassoMLE([2,3,4,160,165]) +cpt=0; +for L2=ind + cpt = cpt+1 +% figure(L2) +% hold on +% for r=1:max(b(:,L2)) +% subplot(2,ceil(max(b(:,L2))/2),r) +% plot(X2Final(:,b(:,L2)==r),c(r)) +% end +% hold off + figure(1) + subplot(2,3,cpt) + hold on + for r=1:max(b(:,L2)) + plot([mean(X2Final(:,b(:,L2)==r),2);mean(Y2Final(:,(b(:,L2)==r)),2)],c(r)) + title('signal moyen de chaque classe, pour chaque modele') + end + hold off + +end \ No newline at end of file diff --git a/reports/bazar_Emilie/sautDeDimension.m b/reports/bazar_Emilie/sautDeDimension.m new file mode 100644 index 0000000..6dec1f8 --- /dev/null +++ b/reports/bazar_Emilie/sautDeDimension.m @@ -0,0 +1,65 @@ +%Saut de dimension +figure(1) +vec=[0,10:100:1210]; + +for r=vec + tableauBis(:,:,r+1) = tableauLassoMLE; + [aaaa,bbbb]= min(tableauLassoMLE(:,4)+r*tableauLassoMLE(:,2)); + tableauBis(:,4,r+1) = tableauLassoMLE(:,4)+r*tableauLassoMLE(:,2); + reponse(r+1)=tableauLassoMLE(bbbb,3); +end + +[x1,y1] = stairs(vec,reponse(vec+1)) +plot(x1,y1,'LineWidth',2) +xlabel('$\kappa$','Interpreter','LaTex','FontSize', 45) +ylabel('Model dimension','FontSize', 45) +set(gca, 'FontSize', 40, 'fontName','Times'); +[Cx1,IA,IC] = unique(y1,'stable') +hold on +plot([x1(IA(2))-0.001,x1(IA(2))],[0,Cx1(2)],'--','LineWidth',2) +plot([2*x1(IA(2))-0.001,2*x1(IA(2))],[0,Cx1(3)],'--','LineWidth',2) +%plot([0,2*x1(IA(2))],[Cx1(3),Cx1(3)],'--','LineWidth',2) +set(gca,'Box','off') +set(gca,'YTick',unique([0:800:700])) +set(gca,'XTick',unique([0:1300:1200])) +text(x1(IA(2))-20,-20,'$\hat{\kappa}$','Interpreter','LaTex','FontSize',45) +text(2*x1(IA(2))-20,-20,'$2 \hat{\kappa}$','Interpreter','LaTex','FontSize',45) +%text(-80,Cx1(3),'$\hat{m}$','Interpreter','LaTex','FontSize',45) +axis([0 1200 0 650]) +hold off + +%LLF pénalisé + +% figure(2) +% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+0*tableauLassoMLE(:,2),'.','markersize', 20) +% xlabel('Model dimension','FontSize', 30) +% ylabel('Log-likelihood','Interpreter','LaTex','FontSize', 30) +% set(gca, 'FontSize', 20, 'fontName','Times'); +figure(3) +ind = find(tableauLassoMLE(:,4) >4000); +tableauLassoMLE(ind,:)=[]; +plot(tableauLassoMLE(2:end,3),tableauLassoMLE(2:end,4)+1100*tableauLassoMLE(2:end,2),'.','markersize', 30) +indiceInt = find(tableauLassoMLE(:,4)+1100*tableauLassoMLE(:,2)<-4401); +indiceBis = find(tableauLassoMLE(:,3)==53); +hold on +plot(tableauLassoMLE(indiceInt,3),tableauLassoMLE(indiceInt,4)+1100*tableauLassoMLE(indiceInt,2),'sr','markersize', 30,'linewidth',5) +plot(tableauLassoMLE(indiceBis,3),tableauLassoMLE(indiceBis,4)+1100*tableauLassoMLE(indiceBis,2),'dg','markersize', 30,'linewidth',5) +xlabel('Model dimension','Interpreter','LaTex','FontSize', 45) +ylabel('Penalized log-likelihood for $2 \hat{\kappa}$','Interpreter','LaTex','FontSize', 45) +set(gca, 'FontSize', 30, 'fontName','Times'); +set(gcf,'Units','normal') +set(gca,'Position',[.1 .1 .88 .85]) +hold off +% figure(4) +% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+3000*tableauLassoMLE(:,2),'.','markersize', 20) +% xlabel('Model dimension','FontSize', 30) +% ylabel('Penalized log-likelihood for $\kappa$ = 3000','Interpreter','LaTex','FontSize', 30) +% set(gca, 'FontSize', 20, 'fontName','Times'); +% +% figure(5) +% n2 = find(piLassoMLE(3,:)~=0,1)-1; +% n3 = find(piLassoMLE(4,:)~=0,1)-1; +% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+0*tableauLassoMLE(:,2),'.','markersize', 20) +% xlabel('Model dimension','FontSize', 30) +% ylabel('Log-likelihood','Interpreter','LaTex','FontSize', 30) +% set(gca, 'FontSize', 20, 'fontName','Times'); \ No newline at end of file diff --git a/reports/bazar_Emilie/script.m b/reports/bazar_Emilie/script.m new file mode 100644 index 0000000..33e24c1 --- /dev/null +++ b/reports/bazar_Emilie/script.m @@ -0,0 +1,30 @@ +%clear all +load donneesSelec.mat +%donnees=[ind100,res100]; +donnees = BB; +Res=mean(donnees,2); +for i=1:349 + X2(:,i)=Res(48*(i-1)+1:48*i); +end +for i=1:348 + signal(:,i)=[X2(:,i);X2(:,i+1)]; +end + +[p1,n1]=size(X2); +for i=1:n1 + for j=1:p1 + XC(i,j)=X2(j,i)-mean(X2(:,i)); + end + + [C,L]=wavedec(XC(i,:)',4,'haar'); + xProj(i,:)=C(1:12); +end + +X=xProj(1:end-2,:); +Y=xProj(2:end-1,:); +for i=1:n1 + Xrecon(i,:)=waverec([xProj(i,:),zeros(1,36)],L,'haar'); +end +for i=1:348 + signal3(:,i)=[Xrecon(i,:),Xrecon(i+1,:)]; +end diff --git a/reports/bazar_Emilie/scriptEssai.m b/reports/bazar_Emilie/scriptEssai.m new file mode 100644 index 0000000..3638977 --- /dev/null +++ b/reports/bazar_Emilie/scriptEssai.m @@ -0,0 +1,28 @@ +%clear all +load donneesSelec.mat +donnees=BB; + +Res=mean(donnees,2); +%Ind=mean(donnees(:,1:100),2); +for i=1:340 + X2(:,i)=Res(48*(i-1)+1:48*i); +end +for i=1:339 + signal(:,i)=[X2(:,i);X2(:,i+1)]; +end + +[p1,n1]=size(X2); +for i=1:n1 + [C,L]=wavedec(X2(:,i)',4,'haar'); + xProj(i,:)=C(4:12); +end + +X=xProj(1:end-2,:); +Y=xProj(2:end-1,:); +for i=1:n1 + Xrecon(i,:)=waverec([zeros(1,3),xProj(i,:),zeros(1,36)],L,'haar'); +end + +for i=1:339 + signal2(:,i)=[Xrecon(i,:),Xrecon(i+1,:)]; +end diff --git a/reports/bazar_Emilie/scriptSimules.m b/reports/bazar_Emilie/scriptSimules.m new file mode 100644 index 0000000..f068b5e --- /dev/null +++ b/reports/bazar_Emilie/scriptSimules.m @@ -0,0 +1,9 @@ +for i = 1:100 + courbeX(i,:) = data(((i-1)*96+1):((i-1)*96+48)); + courbeY(i,:) = data(((i-1)*96+49):((i-1)*96+96)); +end +plot((courbeX)') +figure(2) +plot((courbeY)') + +X2 = (courbeX)' \ No newline at end of file diff --git a/reports/bazar_Emilie/simulatedData.R b/reports/bazar_Emilie/simulatedData.R new file mode 100644 index 0000000..6031822 --- /dev/null +++ b/reports/bazar_Emilie/simulatedData.R @@ -0,0 +1,120 @@ +library("mclust") +#library("R.matlab", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.3") +#redrawData = TRUE +#if (redrawData==TRUE){ + ########### + ## Model + ########### + K = 2 + p = 48 + T = seq(0,1.5,length.out = p) + T2 = seq(0,3, length.out = 2*p) + n = 100 + x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1 + plot(T,x1) + lines(T,x1) + + sigmaX = 0.12 + sigmaY = 0.12 + beta = list() + p1= 0.5 + beta[[1]] =diag(c(rep(p1,5),rep(1,5), rep(p1,5), rep(1, p-15))) + p2 = 1 + beta[[2]] = diag(c(rep(p2,5),rep(1,5), rep(p2,5), rep(1, p-15))) + ITE = 100 + ARI1 = ARI2 = ARI3 = rep(0,ITE) + XY = array(0, dim = c(ITE, 2*p,n)) + XYproj = array(0, dim=c(ITE, 96,n)) + + affec = list() + ########### + ## Iterations + ########### + for (ite in c(1:ITE)){ + ########### + ##Sample + ########### + x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n) + affec[[ite]] = sample(c(1,2), n, replace = TRUE) + y = x + xy = matrix(0,ncol=n, nrow= 2*p) + for (i in c(1:n)){ + y[,i] = x[,i] %*% beta[[affec[[ite]][i]]] + rnorm(p, 0, sigmaY) + xy[,i] = c(x[,i],y[,i]) + XY[ite,,i] = xy[,i] - mean(xy[,i]) + # Dx = dwt(x[,i], filter='haar')@W + # Dx = rev(unlist(Dx)) + # Dx = Dx[2:(1+3+6+12+24)] + # Ax = dwt(x[,i], filter='haar')@V + # Ax = rev(unlist(Ax)) + # Ax = Ax[2:(1+3)] + # Dy = dwt(y[,i], filter='haar')@W + # Dy = rev(unlist(Dy)) + # Dy = Dy[2:(1+3+6+12+24)] + # Ay = dwt(y[,i], filter='haar')@V + # Ay = rev(unlist(Ay)) + # Ay = Ay[2:(1+3)] + # XYproj[ite,,i] = c(Ax,Dx,Ay,Dy) + } + print(ite) + # + # + } + xy[c(7,55),] = NA + # write.table(XY,'data.csv', row.names=FALSE, col.names=FALSE) +matplot(T2,xy[,affec[[ite]]==1],type='l', col='red', lty = 1) +matplot(T2,xy[,affec[[ite]]==2],type='l', col='black', add=TRUE, lty= 1) +abline(v = 1.5) +text(0.75,0,'X', cex = 2 ) +text(0.75+1.5,0,'Y', cex = 2 ) +#proj = read.table('dataProj.csv') +#} + + +#matplot(T,x,type='l', col='black', xlab = '', ylab='', lwd=1.5,lty=1) +#matplot(T,y[,affec[[ite]]==1],type='l', col='red', xlab = '', ylab='', lwd=1.5,lty=1) +#matplot(T,y[,affec[[ite]]==2],type='l', col='black', add=TRUE,lwd=2,lty=1) +# proj2 = array(0,dim=c(ITE,2*p,n)) +# for (ite in c(1:ITE)){ +# for (i in c(1:n)){ +# A = proj[ite,(1+(i-1)*96):(i*96)] +# for (j in 1:96){ +# proj2[ite,j,i] = A[1,j] +# } +# } +# print(ite) +# } +########### +## Iterations +########### +Kmod2 = Kmod1 = rep(0,ITE) +Kmod3 = rep(0,ITE) +for (ite in c(1:ITE)){ + print(ite) + ########### + ## k-means 1 + ########### + mod1 = Mclust(t(XY[ite,,]),G = 1:2, mode='VII') + ARI1[ite] = adjustedRandIndex(mod1$classification, affec[[ite]]) + Kmod1[ite] = mod1$G + # ########### + # ## k-means 2 + # ########### + # #proj2 = + # mod2 = Mclust(t(XYproj[ite,,]),G = 1:8, mode='VII') + # ARI2[ite] = adjustedRandIndex(mod2$classification, affec[[ite]]) + # Kmod2[ite] = mod2$G + # ########### + # ## k-means 1 + # ########### + # #proj3 = + # mod3 = Mclust(t(XYproj[ite,c(4:12,52:60),]),G = 1:8, mode='VII') + # ARI3[ite] = adjustedRandIndex(mod3$classification, affec[[ite]]) + # Kmod3[ite] = mod3$G +} +ARI0 = rep(1,ITE) +par(cex.lab=1.5) +par(cex.axis=1.5) +boxplot(ARI0,ARI1, names = c('LassoMLE','K-means'), lwd=1.3) +table(Kmod1) +table(Kmod2) diff --git a/reports/essai16mars.R b/reports/essai16mars.R new file mode 100644 index 0000000..59bbb53 --- /dev/null +++ b/reports/essai16mars.R @@ -0,0 +1,31 @@ +p = 10 +q = 8 +k = 2 +D = 20 + +meanX = matrix(nrow=p,ncol=k) +meanX[,1] = rep(0,p) +meanX[,2] = rep(1,p) + +covX = array(dim=c(p,p,k)) +covX[,,1] = 0.1*diag(p) +covX[,,2] = 0.5*diag(p) + +covY = array(dim = c(q,q,k)) +covY[,,1] = 0.1*diag(q) +covY[,,2] = 0.2*diag(q) + +beta = array(dim = c(p,q,2)) +beta[,,2] = matrix(c(rep(2,(D)),rep(0, p*q-D))) +beta[,,1] = matrix(c(rep(1,D),rep(0, p*q-D))) + +n = 100 + +pi = c(0.4,0.6) + +data = generateXY(meanX,covX,covY, pi, beta, n) + +X = data$X +Y = data$Y + +res_valse = valse(X,Y) diff --git a/reports/essaiPlot.R b/reports/essaiPlot.R new file mode 100644 index 0000000..10b0e01 --- /dev/null +++ b/reports/essaiPlot.R @@ -0,0 +1,73 @@ +### Regression matrices +model = Res +K = dim(model$phi)[3] +valMax = max(abs(model$phi)) + +require(fields) + +if (K<4){ + par(mfrow = c(1,K)) +} else op = par(mfrow = c(2, (K+1)/2)) + +## Phi + +for (r in 1:K){ + image.plot(t(abs(model$phi[,,r])), + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) +} +par(mfrow = c(1,K),oma = c(0,0,3,0)) +mtext("Regression matrices in each cluster", side=3, line=4, font=2, cex=2, col='red') + +par(mfrow = c(1,2), oma=c(0,0,3,0)) +for (i in 1:4) + plot(runif(20), runif(20), + main=paste("random plot (",i,")",sep='')) +par(op) +mtext("Four plots", + side=3, line=4, font=2, cex=2, col='red') + +### Zoom onto two classes we want to compare +kSel = c(1,2) +par(mfrow = c(1,3)) + +for (r in kSel){ + image.plot(t(abs(model$phi[,,r])),xaxt="n",yaxt="n", + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) +} +image.plot(t(abs(model$phi[,,kSel[1]]-model$phi[,,kSel[2]])), + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) + +### Covariance matrices +par(mfrow = c(K, 1)) +for (r in 1:K){ + image.plot(matrix(diag(model$rho[,,r]), ncol= 1), + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) +} + +### proportions +Gam = matrix(0, ncol = K, nrow = n) +gam = Gam +for (i in 1:n){ + for (r in 1:K){ + sqNorm2 = sum( (Y[i,]%*%model$rho[,,r]-X[i,]%*%model$phi[,,r])^2 ) + Gam[i,r] = model$pi[r] * exp(-0.5*sqNorm2)* det(model$rho[,,r]) + } + gam[i,] = Gam[i,] / sum(Gam[i,]) +} +affec = apply(gam, 1,which.max) +gam2 = matrix(NA, ncol = K, nrow = n) +for (i in 1:n){ + gam2[i, affec[i]] = gam[i, affec[i]] +} +boxplot(gam2) + +### Mean in each cluster +XY = cbind(X,Y) +XY_class= list() +meanPerClass= matrix(0, ncol = K, nrow = dim(XY)[2]) +for (r in 1:K){ + XY_class[[r]] = XY[affec == r, ] + meanPerClass[,r] = apply(XY_class[[r]], 2, mean) +} + +matplot(meanPerClass, type='l') diff --git a/reports/simulData_17mars.R b/reports/simulData_17mars.R new file mode 100644 index 0000000..93a8f20 --- /dev/null +++ b/reports/simulData_17mars.R @@ -0,0 +1,56 @@ +simulData_17mars = function(ite){ + set.seed = 22021989+ite + + ########### + ## Modele + ########### + K = 2 + p = 48 + T = seq(0,1.5,length.out = p) + T2 = seq(0,3, length.out = 2*p) + n = 100 + x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1 + sigmaX = 0.12 + sigmaY = 0.12 + beta = list() + p1= 0.5 + beta[[1]] =diag(c(rep(p1,5),rep(1,5), rep(p1,5), rep(1, p-15))) + p2 = 2 + beta[[2]] = diag(c(rep(p2,5),rep(1,5), rep(p2,5), rep(1, p-15))) + ARI1 = ARI2 = ARI3 = 0 + + ########### + ## Data + Projection + ########### + require(wavelets) + XY = array(0, dim = c(2*p,n)) + XYproj = array(0, dim=c(96,n)) + x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n) + affec = sample(c(1,2), n, replace = TRUE) + y = x + xy = matrix(0,ncol=n, nrow= 2*p) + for (i in c(1:n)){ + y[,i] = x[,i] %*% beta[[affec[i]]] + rnorm(p, 0, sigmaY) + xy[,i] = c(x[,i],y[,i]) + XY[,i] = xy[,i] - mean(xy[,i]) + Dx = dwt(x[,i], filter='haar')@W + Dx = rev(unlist(Dx)) + Dx = Dx[2:(1+3+6+12+24)] + Ax = dwt(x[,i], filter='haar')@V + Ax = rev(unlist(Ax)) + Ax = Ax[2:(1+3)] + Dy = dwt(y[,i], filter='haar')@W + Dy = rev(unlist(Dy)) + Dy = Dy[2:(1+3+6+12+24)] + Ay = dwt(y[,i], filter='haar')@V + Ay = rev(unlist(Ay)) + Ay = Ay[2:(1+3)] + XYproj[,i] = c(Ax,Dx,Ay,Dy) + } + + res_valse = valse(t(x),t(y), kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 1000) + res_valse_proj = valse(t(XYproj[1:p,]),t(XYproj[(p+1):(2*p),]), kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 1000) + + save(res_valse,file=paste("Res_",ite, ".RData",sep="")) + save(res_valse_proj,file=paste("ResProj_",ite, ".RData",sep="")) +} diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 0000000..1a02474 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,4 @@ +test.* +!test.*.c +/data/ +vgcore.* diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..8b8697e --- /dev/null +++ b/test/Makefile @@ -0,0 +1,31 @@ +CC = gcc +CFLAGS = -g -std=gnu99 -Wno-implicit-function-declaration +LDFLAGS = -lm -lgsl -lcblas +TEST_LDFLAGS = -L. libvalse_core.so +LIB = libvalse_core.so +LIB_SRC = $(wildcard ../pkg/src/sources/*.c) +LIB_OBJ = $(LIB_SRC:.c=.o) +INCLUDES = -I../pkg/src/sources +TESTS = test.EMGLLF test.EMGrank + +all: $(LIB) $(TESTS) + +$(LIB): $(LIB_OBJ) + $(CC) -shared -o $@ $^ $(LDFLAGS) + +test.EMGLLF: $(LIB) test.EMGLLF.o test_utils.o + $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS) + +test.EMGrank: $(LIB) test.EMGrank.o test_utils.o + $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS) + +%.o: %.c + $(CC) -fPIC -o $@ -c $< $(CFLAGS) $(INCLUDES) + +clean: + rm -f *.o ../pkg/src/sources/*.o + +cclean: clean + rm -f *.so ../pkg/src/*.so $(TESTS) + +.PHONY: all clean cclean diff --git a/test/generateRunSaveTest_EMGLLF.R b/test/generateRunSaveTest_EMGLLF.R new file mode 100644 index 0000000..c46b77b --- /dev/null +++ b/test/generateRunSaveTest_EMGLLF.R @@ -0,0 +1,48 @@ +source("helper.R") +library(valse) + +generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1., lambda=0.5, eps=1e-6) +{ + testFolder = "./data/" + dir.create(testFolder, showWarnings=FALSE, mode="0755") + + params = basicInitParameters(n, p, m, k) + xy = generateXYdefault(n, p, m, k) + + #save inputs + write.table(as.double(params$phiInit), paste(testFolder,"phiInit",sep=""), + row.names=F, col.names=F) + write.table(as.double(params$rhoInit), paste(testFolder,"rhoInit",sep=""), + row.names=F, col.names=F) + write.table(as.double(params$piInit), paste(testFolder,"piInit",sep=""), + row.names=F, col.names=F) + write.table(as.double(params$gamInit), paste(testFolder,"gamInit",sep=""), + row.names=F, col.names=F) + write.table(as.integer(mini), paste(testFolder,"mini",sep=""), + row.names=F, col.names=F) + write.table(as.integer(maxi), paste(testFolder,"maxi",sep=""), + row.names=F, col.names=F) + write.table(as.double(gamma), paste(testFolder,"gamma",sep=""), + row.names=F, col.names=F) + write.table(as.double(lambda), paste(testFolder,"lambda",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$X), paste(testFolder,"X",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$Y), paste(testFolder,"Y",sep=""), + row.names=F, col.names=F) + write.table(as.double(eps), paste(testFolder,"eps",sep=""), + row.names=F, col.names=F) + write.table(as.integer(c(n,p,m,k)), paste(testFolder,"dimensions",sep=""), + row.names=F, col.names=F) + + res = valse::EMGLLF(params$phiInit,params$rhoInit,params$piInit,params$gamInit,mini, + maxi,gamma,lambda,xy$X,xy$Y,eps,fast=FALSE) + + #save outputs + write.table(as.double(res$phi),paste(testFolder,"phi",sep=""),row.names=F,col.names=F) + write.table(as.double(res$rho),paste(testFolder,"rho",sep=""),row.names=F,col.names=F) + write.table(as.double(res$pi),paste(testFolder,"pi",sep=""),row.names=F,col.names=F) + write.table(as.double(res$llh),paste(testFolder,"llh",sep=""),row.names=F,col.names=F) + write.table(as.double(res$S),paste(testFolder,"S",sep=""),row.names=F,col.names=F) + write.table(as.integer(res$affec),paste(testFolder,"affec",sep=""),row.names=F,col.names=F) +} diff --git a/test/generateRunSaveTest_EMGrank.R b/test/generateRunSaveTest_EMGrank.R new file mode 100644 index 0000000..f6a3314 --- /dev/null +++ b/test/generateRunSaveTest_EMGrank.R @@ -0,0 +1,40 @@ +source("helper.R") +library(valse) + +generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1.0, rank = c(1,2,4)) +{ + eps = 1e-6 + Pi = rep(1.0/k, k) + Rho = array(dim=c(m,m,k)) + for(i in 1:k) + Rho[,,i] = diag(1,m) + xy = generateXYdefault(n, p, m, k) + + testFolder = "./data/" + dir.create(testFolder, showWarnings=FALSE, mode="0755") + #save inputs + write.table(as.double(Pi), paste(testFolder,"Pi",sep=""), + row.names=F, col.names=F) + write.table(as.double(Rho), paste(testFolder,"Rho",sep=""), + row.names=F, col.names=F) + write.table(as.integer(mini), paste(testFolder,"mini",sep=""), + row.names=F, col.names=F) + write.table(as.integer(maxi), paste(testFolder,"maxi",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$X), paste(testFolder,"X",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$Y), paste(testFolder,"Y",sep=""), + row.names=F, col.names=F) + write.table(as.double(eps), paste(testFolder,"eps",sep=""), + row.names=F, col.names=F) + write.table(as.integer(rank), paste(testFolder,"rank",sep=""), + row.names=F, col.names=F) + write.table(as.integer(c(n,p,m,k)), paste(testFolder,"dimensions",sep=""), + row.names=F, col.names=F) + + res = valse::EMGrank(Pi,Rho,mini,maxi,xy$X,xy$Y,eps,rank,fast=FALSE) + + #save output + write.table(as.double(res$phi),paste(testFolder,"phi",sep=""),row.names=F,col.names=F) + write.table(as.double(res$LLF),paste(testFolder,"LLF",sep=""),row.names=F,col.names=F) +} diff --git a/test/helper.R b/test/helper.R new file mode 100644 index 0000000..8ec122b --- /dev/null +++ b/test/helper.R @@ -0,0 +1,58 @@ +#' Generate a sample of (X,Y) of size n with default values +#' +#' @param n sample size +#' @param p number of covariates +#' @param m size of the response +#' @param k number of clusters +#' +#' @return list with X and Y +#' +generateXYdefault = function(n, p, m, k) +{ + meanX = rep(0, p) + covX = diag(p) + covY = array(dim=c(m,m,k)) + for(r in 1:k) + covY[,,r] = diag(m) + Ï = rep(1./k,k) + #initialize beta to a random number of non-zero random value + β = array(0, dim=c(p,m,k)) + for (j in 1:p) + { + nonZeroCount = sample(1:m, 1) + β[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k) + } + + sample_IO = generateXY(n, Ï, meanX, β, covX, covY) + return (list(X=sample_IO$X,Y=sample_IO$Y)) +} + +#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for +#' weights, identity for covariance matrices, and uniformly distributed for the +#' clustering) +#' +#' @param n sample size +#' @param p number of covariates +#' @param m size of the response +#' @param k number of clusters +#' +#' @return list with phiInit, rhoInit,piInit,gamInit +#' +basicInitParameters = function(n,p,m,k) +{ + phiInit = array(0, dim=c(p,m,k)) + + piInit = (1./k)*rep(1,k) + + rhoInit = array(dim=c(m,m,k)) + for (i in 1:k) + rhoInit[,,i] = diag(m) + + gamInit = 0.1 * matrix(1, nrow=n, ncol=k) + R = sample(1:k, n, replace=TRUE) + for (i in 1:n) + gamInit[i,R[i]] = 0.9 + gamInit = gamInit/sum(gamInit[1,]) + + return (list("phiInit"=phiInit, "rhoInit"=rhoInit, "piInit"=piInit, "gamInit"=gamInit)) +} diff --git a/test/run.sh b/test/run.sh new file mode 100755 index 0000000..7e92929 --- /dev/null +++ b/test/run.sh @@ -0,0 +1,37 @@ +#!/bin/sh +set -e + +#Testing procedure for EMGLLF (inside this folder): + +algo=$1 #EMGLLF or EMGrank, + #second arg indicate if rebuild or rebuild+clean requested + +if [ "$2" == 'c' ]; then + #0.1) Clean package + C testing code + find ../pkg/man/ -type f ! -name 'valse-package.Rd' -delete + rm -f ../pkg/NAMESPACE + # Erase object and library files + rm -f ../pkg/src/*.so + rm -f ../pkg/src/adapters/*.o + make cclean +fi + +if [ "$2" == 'r' ] || [ "$2" == 'c' ]; then + #0.2) Install current version of the package (WARNING: roxygen 2 v5.0.1) + # --> devtools::install_github('klutometis/roxygen@v5.0.1') + echo "setwd('../pkg');library(roxygen2);roxygenize('.')" | R --slave + R CMD INSTALL ../pkg +fi + +#1) Generate data using R versions of EMGLLF/EMGrank (slow, but trusted) +echo -e "source('generateRunSaveTest_$algo.R');\n \ + # I'm happy with default values - feel free to give args\n \ + generateRunSaveTest_$algo() " \ + | R --slave + +#2) Compile test C code into an executable named "test.$algo" +make test.$algo + +#3) Run it with valgrind! +#valgrind +./test.$algo diff --git a/test/script_data.R b/test/script_data.R new file mode 100644 index 0000000..da319da --- /dev/null +++ b/test/script_data.R @@ -0,0 +1,15 @@ +m=6 +p=6 + +covY = array(0,dim = c(m,m,2)) +covY[,,1] = diag(m) +covY[,,2] = diag(m) + +Beta = array(0, dim = c(p, m, 2)) +Beta[1:4,1:4,1] = 3*diag(4) +Beta[1:4,1:4,2] = -2*diag(4) + +Data = generateXY(200, c(0.5,0.5), rep(0,p), Beta, diag(p), covY) +# +Res = valse(Data$X,Data$Y, fast=TRUE, plot=FALSE, verbose = TRUE, kmax=3, size_coll_mod = 50, selecMod = "DDSE", mini = 50, maxi=100) +plot(Res$tableau[,3], -Res$tableau[,4]) diff --git a/test/test.EMGLLF.c b/test/test.EMGLLF.c new file mode 100644 index 0000000..68f73d9 --- /dev/null +++ b/test/test.EMGLLF.c @@ -0,0 +1,83 @@ +#include "EMGLLF.h" +#include "test_utils.h" +#include <stdlib.h> + +int main(int argc, char** argv) +{ + int* dimensions = readArray_int("dimensions"); + int n = dimensions[0]; + int p = dimensions[1]; + int m = dimensions[2]; + int k = dimensions[3]; + free(dimensions); + + //////////// + // INPUTS // + Real* phiInit = readArray_real("phiInit"); + Real* rhoInit = readArray_real("rhoInit"); + Real* piInit = readArray_real("piInit"); + Real* gamInit = readArray_real("gamInit"); + int mini = read_int("mini"); + int maxi = read_int("maxi"); + Real gamma = read_real("gamma"); + Real lambda = read_real("lambda"); + Real* X = readArray_real("X"); + Real* Y = readArray_real("Y"); + Real eps = read_real("eps"); + //////////// + + ///////////// + // OUTPUTS // + Real* phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* pi = (Real*)malloc(k*sizeof(Real)); + Real llh; + Real* S = (Real*)malloc(p*m*k*sizeof(Real)); + int* affec = (int*)malloc(n*sizeof(int)); + ///////////// + + //////////////////// + // Call to EMGLLF // + EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps, + phi,rho,pi,&llh,S,affec, + n,p,m,k); + //////////////////// + + free(phiInit); + free(rhoInit); + free(piInit); + free(gamInit); + free(X); + free(Y); + + // Compare to reference outputs + Real* ref_phi = readArray_real("phi"); + compareArray_real("phi", phi, ref_phi, p*m*k); + free(phi); + free(ref_phi); + + Real* ref_rho = readArray_real("rho"); + compareArray_real("rho", rho, ref_rho, m*m*k); + free(rho); + free(ref_rho); + + Real* ref_pi = readArray_real("pi"); + compareArray_real("pi", pi, ref_pi, k); + free(pi); + free(ref_pi); + + Real ref_llh = read_real("llh"); + compareArray_real("llh", &llh, &ref_llh, 1); + + Real* ref_S = readArray_real("S"); + compareArray_real("S", S, ref_S, p*m*k); + free(S); + free(ref_S); + + int* ref_affec = readArray_int("affec"); + compareArray_int("affec", affec, ref_affec, n); + free(affec); + free(ref_affec); + + return 0; +} diff --git a/test/test.EMGrank.c b/test/test.EMGrank.c new file mode 100644 index 0000000..71ce506 --- /dev/null +++ b/test/test.EMGrank.c @@ -0,0 +1,58 @@ +#include "EMGrank.h" +#include "test_utils.h" +#include <stdlib.h> + +int main(int argc, char** argv) +{ + int* dimensions = readArray_int("dimensions"); + int n = dimensions[0]; + int p = dimensions[1]; + int m = dimensions[2]; + int k = dimensions[3]; + free(dimensions); + + //////////// + // INPUTS // + Real* Rho = readArray_real("Rho"); + Real* Pi = readArray_real("Pi"); + int mini = read_int("mini"); + int maxi = read_int("maxi"); + Real* X = readArray_real("X"); + Real* Y = readArray_real("Y"); + Real eps = read_real("eps"); + int* rank = readArray_int("rank"); + //////////// + + ///////////// + // OUTPUTS // + Real* phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* LLF = (Real*)malloc(1*sizeof(Real)); + ///////////// + + ////////////////////////// + // Main call to EMGrank // + EMGrank_core(Pi,Rho,mini,maxi,X,Y,eps,rank, + phi,LLF, + n,p,m,k); + ////////////////////////// + + free(Rho); + free(Pi); + free(X); + free(Y); + free(rank); + + // Compare to reference outputs + Real* ref_phi = readArray_real("phi"); + compareArray_real("phi", phi, ref_phi, p*m*k); + free(phi); + free(ref_phi); + + // LLF + Real* ref_LLF = readArray_real("LLF"); + compareArray_real("LLF", LLF, ref_LLF, 1); + free(LLF); + free(ref_LLF); + + return 0; +} diff --git a/test/test_EMGLLF.R b/test/test_EMGLLF.R new file mode 100644 index 0000000..8ca7dea --- /dev/null +++ b/test/test_EMGLLF.R @@ -0,0 +1,39 @@ +library(valse) +testFolder = "data/" + +# NOTE: R typing is really terrible. as.double as.matrix ...and so on; don't remove. + +#get inputs +npmk = as.matrix(read.table(paste(testFolder,"dimensions",sep=""))) +n = npmk[1]; p=npmk[2]; m=npmk[3]; k=npmk[4] +phiInit = array(as.double(as.matrix(read.table(paste(testFolder,"phiInit",sep="")))), dim=c(p,m,k)) +rhoInit = array(as.double(as.matrix(read.table(paste(testFolder,"rhoInit",sep="")))), dim=c(m,m,k)) +piInit = as.double(as.matrix(read.table(paste(testFolder,"piInit",sep="")))[,]) +gamInit = matrix(as.double(as.matrix(read.table(paste(testFolder,"gamInit",sep="")))), n,k) +mini = as.integer(as.matrix(read.table(paste(testFolder,"mini",sep="")))[1]) +maxi = as.integer(as.matrix(read.table(paste(testFolder,"maxi",sep="")))[1]) +gamma = as.double(as.matrix(read.table(paste(testFolder,"gamma",sep="")))[1]) +lambda = as.double(as.matrix(read.table(paste(testFolder,"lambda",sep="")))[1]) +X = matrix(as.double(as.matrix(read.table(paste(testFolder,"X",sep="")))), n,p) +Y = matrix(as.double(as.matrix(read.table(paste(testFolder,"Y",sep="")))), n,m) +eps = as.double(as.matrix(read.table(paste(testFolder,"eps",sep="")))[1]) + +#get outputs +phi = array(as.double(as.matrix(read.table(paste(testFolder,"phi",sep="")))), dim=c(p,m,k)) +rho = array(as.double(as.matrix(read.table(paste(testFolder,"rho",sep="")))), dim=c(m,m,k)) +pi = as.double(as.matrix(read.table(paste(testFolder,"pi",sep="")))[,]) +llh = as.double(as.matrix(read.table(paste(testFolder,"llh",sep="")))[1]) +S = array(as.double(as.matrix(read.table(paste(testFolder,"S",sep="")))), dim=c(p,m,k)) +affec = as.double(as.matrix(read.table(paste(testFolder,"affec",sep="")))[,]) + +res = valse::EMGLLF( + phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,fast=TRUE) + +#compare outputs +nd=7 #number of digits +print( all(round(phi,nd) == round(res$phi,nd)) ) +print( all(round(rho,nd) == round(res$rho,nd)) ) +print( all(round(pi,nd) == round(res$pi,nd)) ) +print( all(round(llh,nd) == round(res$llh,nd)) ) +print( all(round(S,nd) == round(res$S,nd)) ) +print( all(affec == res$affec) ) diff --git a/test/test_EMGrank.R b/test/test_EMGrank.R new file mode 100644 index 0000000..b140e5e --- /dev/null +++ b/test/test_EMGrank.R @@ -0,0 +1,27 @@ +library(valse) +testFolder = "data/" + +# NOTE: R typing is really terrible. as.double as.matrix ...and so on; don't remove. + +#get inputs +npmk = as.matrix(read.table(paste(testFolder,"dimensions",sep=""))) +n = npmk[1]; p=npmk[2]; m=npmk[3]; k=npmk[4] +Pi = as.double(as.matrix(read.table(paste(testFolder,"Pi",sep="")))[,]) +Rho = array(as.double(as.matrix(read.table(paste(testFolder,"Rho",sep="")))), dim=c(m,m,k)) +mini = as.integer(as.matrix(read.table(paste(testFolder,"mini",sep="")))[1]) +maxi = as.integer(as.matrix(read.table(paste(testFolder,"maxi",sep="")))[1]) +X = matrix(as.double(as.matrix(read.table(paste(testFolder,"X",sep="")))), n,p) +Y = matrix(as.double(as.matrix(read.table(paste(testFolder,"Y",sep="")))), n,m) +eps = as.double(as.matrix(read.table(paste(testFolder,"eps",sep="")))[1]) +rank = as.double(as.matrix(read.table(paste(testFolder,"rank",sep="")))[,]) + +#get outputs +phi = array(as.double(as.matrix(read.table(paste(testFolder,"phi",sep="")))), dim=c(p,m,k)) +LLF = as.double(as.matrix(read.table(paste(testFolder,"LLF",sep="")))[1]) + +res = valse::EMGrank(Pi,Rho,mini,maxi,X,Y,eps,rank,fast=TRUE) + +#compare outputs +nd=7 #number of digits +print( all(round(phi,nd) == round(res$phi,nd)) ) +print( all(round(LLF,nd) == round(res$LLF,nd)) ) diff --git a/test/test_utils.c b/test/test_utils.c new file mode 100644 index 0000000..96972ee --- /dev/null +++ b/test/test_utils.c @@ -0,0 +1,103 @@ +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include "utils.h" + +// Check if array == refArray +void compareArray(const char* ID, const void* array, const void* refArray, int size, + int isinteger) +{ + Real EPS = 1e-5; //precision + printf("Checking %s\n",ID); + Real maxError = 0.0; + for (int i=0; i<size; i++) + { + Real error = isinteger + ? fabs(((int*)array)[i] - ((int*)refArray)[i]) + : fabs(((Real*)array)[i] - ((Real*)refArray)[i]); + if (error >= maxError) + maxError = error; + } + if (maxError >= EPS) + printf(" Inaccuracy: max(abs(error)) = %g >= %g\n",maxError,EPS); + else + printf(" OK\n"); +} + +void compareArray_real(const char* ID, const void* array, const void* refArray, int size) +{ + return compareArray(ID, array, refArray, size, 0); +} + +void compareArray_int(const char* ID, const void* array, const void* refArray, int size) +{ + return compareArray(ID, array, refArray, size, 1); +} + +// Read array by columns (as in MATLAB) and return by-rows encoding +void* readArray(const char* fileName, int isinteger) +{ + // need to prepend 'data/' (not really nice code...) + char* fullFileName = (char*)calloc(5+strlen(fileName)+1, sizeof(char)); + strcat(fullFileName, "data/"); + strcat(fullFileName, fileName); + + // first pass to know how many elements to allocate + char* command = (char*)calloc(12+strlen(fullFileName)+8+1, sizeof(char)); + strcat(command, "wc -l "); + strcat(command, fullFileName); + FILE *arraySize = popen(command, "r"); + free(command); + char* bufferNum = (char*)calloc(64, sizeof(char)); + fgets(bufferNum, sizeof(bufferNum), arraySize); + int n = atoi(bufferNum); + pclose(arraySize); + + // open file for reading + FILE* arrayFile = fopen(fullFileName, "r"); + free(fullFileName); + + // read all values, and convert them to by-rows matrices format + size_t elementSize = isinteger ? sizeof(int) : sizeof(Real); + void* array = malloc(n*elementSize); + for (int i=0; i<n; i++) + { + fgets(bufferNum, 64, arrayFile); + // transform buffer content into Real or int, and store it at appropriate location + if (isinteger) + ((int*)array)[i] = atoi(bufferNum); + else + ((Real*)array)[i] = atof(bufferNum); + } + fclose(arrayFile); + free(bufferNum); + + return array; +} + +int* readArray_int(const char* fileName) +{ + return (int*)readArray(fileName, 1); +} + +Real* readArray_real(const char* fileName) +{ + return (Real*)readArray(fileName, 0); +} + +int read_int(const char* fileName) +{ + int* array = readArray_int(fileName); + int res = array[0]; + free(array); + return res; +} + +Real read_real(const char* fileName) +{ + Real* array = readArray_real(fileName); + Real res = array[0]; + free(array); + return res; +} diff --git a/test/test_utils.h b/test/test_utils.h new file mode 100644 index 0000000..e0aec96 --- /dev/null +++ b/test/test_utils.h @@ -0,0 +1,17 @@ +// Check if array == refArray +void compareArray(const char* ID, const void* array, const void* refArray, int size, int isInteger); + +void compareArray_real(const char* ID, const void* array, const void* refArray, int size); + +void compareArray_int(const char* ID, const void* array, const void* refArray, int size); + +// Read array by columns (as in MATLAB) and return by-rows encoding +void* readArray(const char* fileName, int isInteger); + +int* readArray_int(const char* fileName); + +Real* readArray_real(const char* fileName); + +int read_int(const char* fileName); + +Real read_real(const char* fileName); -- 2.44.0