From 3453829ed3723a2b18ac478a6b4ef5d087a9d68d Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 22 Jan 2018 15:12:50 +0100 Subject: [PATCH] First commit --- .gitattributes | 5 + .gitfat | 2 + .gitignore | 24 ++ README.md | 18 ++ TODO | 15 + biblio/articleMethodeDevijverADAC.pdf | 1 + biblio/these_Emilie.pdf | 1 + hooks/pre-commit.bad | 42 +++ hooks/pre-push | 3 + pkg/.gitignore | 4 + pkg/DESCRIPTION | 43 +++ pkg/LICENSE | 23 ++ pkg/R/A_NAMESPACE.R | 16 + pkg/R/EMGLLF.R | 195 ++++++++++++ pkg/R/EMGrank.R | 120 +++++++ pkg/R/computeGridLambda.R | 37 +++ pkg/R/constructionModelesLassoMLE.R | 113 +++++++ pkg/R/constructionModelesLassoRank.R | 95 ++++++ pkg/R/generateXY.R | 39 +++ pkg/R/initSmallEM.R | 80 +++++ pkg/R/main.R | 152 +++++++++ pkg/R/plot_valse.R | 89 ++++++ pkg/R/selectVariables.R | 83 +++++ pkg/R/util.R | 7 + pkg/data/data.RData | Bin 0 -> 1010 bytes pkg/data/data2.RData | Bin 0 -> 18385 bytes pkg/inst/testdata/TODO.csv | 1 + pkg/man/valse-package.Rd | 37 +++ pkg/src/Makevars | 11 + pkg/src/adapters/a.EMGLLF.c | 91 ++++++ pkg/src/adapters/a.EMGrank.c | 73 +++++ pkg/src/sources/EMGLLF.c | 420 +++++++++++++++++++++++++ pkg/src/sources/EMGLLF.h | 32 ++ pkg/src/sources/EMGrank.c | 307 ++++++++++++++++++ pkg/src/sources/EMGrank.h | 25 ++ pkg/src/sources/utils.h | 48 +++ pkg/tests/testthat.R | 4 + pkg/tests/testthat/helper-context1.R | 5 + pkg/tests/testthat/test-context1.R | 11 + pkg/vignettes/.gitignore | 14 + reports/bazar_Emilie/dessinBeta.m | 97 ++++++ reports/bazar_Emilie/niveauColores.m | 18 ++ reports/bazar_Emilie/ondelettes.m | 83 +++++ reports/bazar_Emilie/ondelettes2.m | 67 ++++ reports/bazar_Emilie/pretraitement.m | 22 ++ reports/bazar_Emilie/recomp.m | 29 ++ reports/bazar_Emilie/reconstruction.m | 7 + reports/bazar_Emilie/resultatSousEns.m | 21 ++ reports/bazar_Emilie/sautDeDimension.m | 65 ++++ reports/bazar_Emilie/script.m | 30 ++ reports/bazar_Emilie/scriptEssai.m | 28 ++ reports/bazar_Emilie/scriptSimules.m | 9 + reports/bazar_Emilie/simulatedData.R | 120 +++++++ reports/essai16mars.R | 31 ++ reports/essaiPlot.R | 73 +++++ reports/simulData_17mars.R | 56 ++++ test/.gitignore | 4 + test/Makefile | 31 ++ test/generateRunSaveTest_EMGLLF.R | 48 +++ test/generateRunSaveTest_EMGrank.R | 40 +++ test/helper.R | 58 ++++ test/run.sh | 37 +++ test/script_data.R | 15 + test/test.EMGLLF.c | 83 +++++ test/test.EMGrank.c | 58 ++++ test/test_EMGLLF.R | 39 +++ test/test_EMGrank.R | 27 ++ test/test_utils.c | 103 ++++++ test/test_utils.h | 17 + 69 files changed, 3602 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitfat create mode 100644 .gitignore create mode 100644 README.md create mode 100644 TODO create mode 100644 biblio/articleMethodeDevijverADAC.pdf create mode 100644 biblio/these_Emilie.pdf create mode 100755 hooks/pre-commit.bad create mode 100755 hooks/pre-push create mode 100644 pkg/.gitignore create mode 100644 pkg/DESCRIPTION create mode 100644 pkg/LICENSE create mode 100644 pkg/R/A_NAMESPACE.R create mode 100644 pkg/R/EMGLLF.R create mode 100644 pkg/R/EMGrank.R create mode 100644 pkg/R/computeGridLambda.R create mode 100644 pkg/R/constructionModelesLassoMLE.R create mode 100644 pkg/R/constructionModelesLassoRank.R create mode 100644 pkg/R/generateXY.R create mode 100644 pkg/R/initSmallEM.R create mode 100644 pkg/R/main.R create mode 100644 pkg/R/plot_valse.R create mode 100644 pkg/R/selectVariables.R create mode 100644 pkg/R/util.R create mode 100644 pkg/data/data.RData create mode 100644 pkg/data/data2.RData create mode 100644 pkg/inst/testdata/TODO.csv create mode 100644 pkg/man/valse-package.Rd create mode 100644 pkg/src/Makevars create mode 100644 pkg/src/adapters/a.EMGLLF.c create mode 100644 pkg/src/adapters/a.EMGrank.c create mode 100644 pkg/src/sources/EMGLLF.c create mode 100644 pkg/src/sources/EMGLLF.h create mode 100644 pkg/src/sources/EMGrank.c create mode 100644 pkg/src/sources/EMGrank.h create mode 100644 pkg/src/sources/utils.h create mode 100644 pkg/tests/testthat.R create mode 100644 pkg/tests/testthat/helper-context1.R create mode 100644 pkg/tests/testthat/test-context1.R create mode 100644 pkg/vignettes/.gitignore create mode 100644 reports/bazar_Emilie/dessinBeta.m create mode 100644 reports/bazar_Emilie/niveauColores.m create mode 100644 reports/bazar_Emilie/ondelettes.m create mode 100644 reports/bazar_Emilie/ondelettes2.m create mode 100644 reports/bazar_Emilie/pretraitement.m create mode 100644 reports/bazar_Emilie/recomp.m create mode 100644 reports/bazar_Emilie/reconstruction.m create mode 100644 reports/bazar_Emilie/resultatSousEns.m create mode 100644 reports/bazar_Emilie/sautDeDimension.m create mode 100644 reports/bazar_Emilie/script.m create mode 100644 reports/bazar_Emilie/scriptEssai.m create mode 100644 reports/bazar_Emilie/scriptSimules.m create mode 100644 reports/bazar_Emilie/simulatedData.R create mode 100644 reports/essai16mars.R create mode 100644 reports/essaiPlot.R create mode 100644 reports/simulData_17mars.R create mode 100644 test/.gitignore create mode 100644 test/Makefile create mode 100644 test/generateRunSaveTest_EMGLLF.R create mode 100644 test/generateRunSaveTest_EMGrank.R create mode 100644 test/helper.R create mode 100755 test/run.sh create mode 100644 test/script_data.R create mode 100644 test/test.EMGLLF.c create mode 100644 test/test.EMGrank.c create mode 100644 test/test_EMGLLF.R create mode 100644 test/test_EMGrank.R create mode 100644 test/test_utils.c create mode 100644 test/test_utils.h diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..df69bb5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +*.pdf filter=fat +*.zip filter=fat +*.tar.xz filter=fat +*.data filter=fat +*.png filter=fat diff --git a/.gitfat b/.gitfat new file mode 100644 index 0000000..fc5cc54 --- /dev/null +++ b/.gitfat @@ -0,0 +1,2 @@ +[rsync] +remote = gitfat@auder.net:~/files/valse diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d45f42a --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +#ignore temporary files +*~ +*.swp +*.Rout + +#ignore R session files + RStudio files +.Rhistory +.RData +*.Rproj* +.Rprofile +.Rbuildignore + +#ignore R CMD build/check genrated files +/*.Rcheck/ +/*.tar.gz + +#ignore object files and executables +*.o +*.so +*.exe + +#misc +Rprof.out +*.zip diff --git a/README.md b/README.md new file mode 100644 index 0000000..cf08453 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# VAriable seLection with mixtureS of modEls + +This code is a re-writing from a similar project in MATLAB, still online [here](https://git.auder.net/?p=select.git;a=summary). + +Code co-authors: [Emilie Devijver](http://ama.liglab.fr/~devijver/), [Benjamin Gohehry](http://www.math.u-psud.fr/~goehry/). + +This code corresponds to applied parts of the PhD thesis of both co-authors. + +It uses git-fat to store binary objects. "git fat init && git fat pull" will get them. + +## Description + +TODO : see R package + +Trouver un jeu de données (+) intéressant (que les autres) ? +Ajouter toy datasets pour les tests (ou piocher dans MASS ?) + +ED : j'ai simulé un truc basique via 'generateXYdefault(10,5,6,2)' diff --git a/TODO b/TODO new file mode 100644 index 0000000..fed6eff --- /dev/null +++ b/TODO @@ -0,0 +1,15 @@ +n = 100; m = 70; p = 5 +X = matrix(runif(n*p, -10, 10), nrow=n) +Y = matrix(runif(n*m, -5, 15), nrow=n) + +V1 = valse::valse(X, Y, fast=FALSE) +Error in while (!pi2AllPositive) { : + missing value where TRUE/FALSE needed + +V2 = valse::valse(X, Y, fast=TRUE) +list() +Error in out[[ind_uniq[l]]] : + attempt to select less than one element in get1index + +==> Error identified: line 61 in initSmallEM.R, division by 0 +It occurs also for smallers values of n and m, e.g.: n = 20; m = 20; p = 3 diff --git a/biblio/articleMethodeDevijverADAC.pdf b/biblio/articleMethodeDevijverADAC.pdf new file mode 100644 index 0000000..9aad91e --- /dev/null +++ b/biblio/articleMethodeDevijverADAC.pdf @@ -0,0 +1 @@ +#$# git-fat 31158da74f93ba8b9e3883e8d4173e2da02255cf 1930037 diff --git a/biblio/these_Emilie.pdf b/biblio/these_Emilie.pdf new file mode 100644 index 0000000..1262e1d --- /dev/null +++ b/biblio/these_Emilie.pdf @@ -0,0 +1 @@ +#$# git-fat c89e92fb27bcb2c94bc0eddb2af5e724ff676b9a 2049044 diff --git a/hooks/pre-commit.bad b/hooks/pre-commit.bad new file mode 100755 index 0000000..bf5d735 --- /dev/null +++ b/hooks/pre-commit.bad @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Hook used to indent all source files before commiting +# + +# indent / format file by type +indent() { + # getting against as the current commit + if git rev-parse --verify HEAD >/dev/null 2>&1 + then + local against=HEAD + else + # Initial commit: diff against an empty tree object + local against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 + fi + + # loop on modified files + git diff --cached --name-only $against | while read file; + do + local ext=$(expr "$file" : ".*\(\..*\)") + case $ext in + .R|.r) + __indent_R; + ;; + esac + done +} + +# Indent the file with `indent' if this is a R file +__indent_R() { + if test ! -f $file + then + return; + fi + + echo "Indenting " $file + echo "library(formatR);formatR::tidy_source('$file',comment=TRUE,blank=TRUE, + arrow=TRUE,brace.newline=TRUE,indent=2,width.cutoff=80,file='$file')" | R --slave + git add "$file" +} + +indent diff --git a/hooks/pre-push b/hooks/pre-push new file mode 100755 index 0000000..ba815bd --- /dev/null +++ b/hooks/pre-push @@ -0,0 +1,3 @@ +#!/bin/sh + +git-fat push diff --git a/pkg/.gitignore b/pkg/.gitignore new file mode 100644 index 0000000..ddc8772 --- /dev/null +++ b/pkg/.gitignore @@ -0,0 +1,4 @@ +#ignore roxygen2 generated files +/NAMESPACE +/man/*.Rd +!/man/*-package.Rd diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION new file mode 100644 index 0000000..72723c0 --- /dev/null +++ b/pkg/DESCRIPTION @@ -0,0 +1,43 @@ +Package: valse +Title: Variable Selection With Mixture Of Models +Date: 2016-12-01 +Version: 0.1-0 +Description: Two methods are implemented to cluster data with finite mixture + regression models. Those procedures deal with high-dimensional covariates and + responses through a variable selection procedure based on the Lasso estimator. + A low-rank constraint could be added, computed for the Lasso-Rank procedure. + A collection of models is constructed, varying the level of sparsity and the + number of clusters, and a model is selected using a model selection criterion + (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model- + based clustering for high-dimensional data. Application to functional data' by + Emilie Devijver, published in Advances in Data Analysis and Clustering (2016). +Author: Benjamin Auder [aut,cre], + Emilie Devijver [aut], + Benjamin Goehry [aut] +Maintainer: Benjamin Auder +Depends: + R (>= 3.0.0) +Imports: + MASS, + parallel +Suggests: + capushe, + methods, + roxygen2, + testthat +URL: http://git.auder.net/?p=valse.git +License: MIT + file LICENSE +RoxygenNote: 6.0.1 +Collate: + 'plot_valse.R' + 'main.R' + 'selectVariables.R' + 'constructionModelesLassoRank.R' + 'constructionModelesLassoMLE.R' + 'computeGridLambda.R' + 'initSmallEM.R' + 'EMGrank.R' + 'EMGLLF.R' + 'generateXY.R' + 'A_NAMESPACE.R' + 'util.R' diff --git a/pkg/LICENSE b/pkg/LICENSE new file mode 100644 index 0000000..a212458 --- /dev/null +++ b/pkg/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) + 2014-2017, Benjamin Auder + 2014-2017, Emilie Devijver + 2016-2017, Benjamin Goehry + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R new file mode 100644 index 0000000..8e1783e --- /dev/null +++ b/pkg/R/A_NAMESPACE.R @@ -0,0 +1,16 @@ +#' @include generateXY.R +#' @include EMGLLF.R +#' @include EMGrank.R +#' @include initSmallEM.R +#' @include computeGridLambda.R +#' @include constructionModelesLassoMLE.R +#' @include constructionModelesLassoRank.R +#' @include selectVariables.R +#' @include main.R +#' @include plot_valse.R +#' +#' @useDynLib valse +#' +#' @importFrom parallel makeCluster parLapply stopCluster clusterExport +#' @importFrom MASS ginv +NULL diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R new file mode 100644 index 0000000..57638f9 --- /dev/null +++ b/pkg/R/EMGLLF.R @@ -0,0 +1,195 @@ +#' EMGLLF +#' +#' Description de EMGLLF +#' +#' @param phiInit an initialization for phi +#' @param rhoInit an initialization for rho +#' @param piInit an initialization for pi +#' @param gamInit initialization for the a posteriori probabilities +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param lambda regularization parameter in the Lasso estimation +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' +#' @return A list ... phi,rho,pi,LLF,S,affec: +#' phi : parametre de moyenne renormalisé, calculé par l'EM +#' rho : parametre de variance renormalisé, calculé par l'EM +#' pi : parametre des proportions renormalisé, calculé par l'EM +#' LLF : log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres +#' S : ... +#' affec : ... +#' +#' @export +EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, fast) +{ + if (!fast) + { + # Function in R + return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps)) + } + + # Function in C + n <- nrow(X) #nombre d'echantillons + p <- ncol(X) #nombre de covariables + m <- ncol(Y) #taille de Y (multivarié) + k <- length(piInit) #nombre de composantes dans le mélange + .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k), + llh = double(1), S = double(p * m * k), affec = integer(n), n, p, m, k, + PACKAGE = "valse") +} + +# R version - slow but easy to read +.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps) +{ + # Matrix dimensions + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + + # Adjustments required when p==1 or m==1 (var.sel. or output dim 1) + if (p==1 || m==1) + phiInit <- array(phiInit, dim=c(p,m,k)) + if (m==1) + rhoInit <- array(rhoInit, dim=c(m,m,k)) + + # Outputs + phi <- phiInit + rho <- rhoInit + pi <- piInit + llh <- -Inf + S <- array(0, dim = c(p, m, k)) + + # Algorithm variables + gam <- gamInit + Gram2 <- array(0, dim = c(p, p, k)) + ps2 <- array(0, dim = c(p, m, k)) + X2 <- array(0, dim = c(n, p, k)) + Y2 <- array(0, dim = c(n, m, k)) + + for (ite in 1:maxi) + { + # Remember last pi,rho,phi values for exit condition in the end of loop + Phi <- phi + Rho <- rho + Pi <- pi + + # Computations associated to X and Y + for (r in 1:k) + { + for (mm in 1:m) + Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm] + for (i in 1:n) + X2[i, , r] <- sqrt(gam[i, r]) * X[i, ] + for (mm in 1:m) + ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r]) + for (j in 1:p) + { + for (s in 1:p) + Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r]) + } + } + + ## M step + + # For pi + b <- sapply(1:k, function(r) sum(abs(phi[, , r]))) + gam2 <- colSums(gam) + a <- sum(gam %*% log(pi)) + + # While the proportions are nonpositive + kk <- 0 + pi2AllPositive <- FALSE + while (!pi2AllPositive) + { + pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi) + pi2AllPositive <- all(pi2 >= 0) + kk <- kk + 1 + } + + # t(m) is the largest value in the grid O.1^k such that it is nonincreasing + while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < + # na.rm=TRUE to handle 0*log(0) + -sum(gam2 * log(pi2), na.rm=TRUE)/n + lambda * sum(pi2^gamma * b)) + { + pi2 <- pi + 0.1^kk * (1/n * gam2 - pi) + kk <- kk + 1 + } + t <- 0.1^kk + pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi)) + + # For phi and rho + for (r in 1:k) + { + for (mm in 1:m) + { + ps <- 0 + for (i in 1:n) + ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r]) + nY2 <- sum(Y2[, mm, r]^2) + rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2) + } + } + + for (r in 1:k) + { + for (j in 1:p) + { + for (mm in 1:m) + { + S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + + sum(phi[-j, mm, r] * Gram2[j, -j, r]) + if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- 0 + } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r] + } else { + phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r] + } + } + } + } + + ## E step + + # Precompute det(rho[,,r]) for r in 1...k + detRho <- sapply(1:k, function(r) gdet(rho[, , r])) + sumLogLLH <- 0 + for (i in 1:n) + { + # Update gam[,]; use log to avoid numerical problems + logGam <- sapply(1:k, function(r) { + log(pi[r]) + log(detRho[r]) - 0.5 * + sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2) + }) + + logGam <- logGam - max(logGam) #adjust without changing proportions + gam[i, ] <- exp(logGam) + norm_fact <- sum(gam[i, ]) + gam[i, ] <- gam[i, ] / norm_fact + sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2)) + } + + sumPen <- sum(pi^gamma * b) + last_llh <- llh + llh <- -sumLogLLH/n #+ lambda * sumPen + dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh))) + Dist1 <- max((abs(phi - Phi))/(1 + abs(phi))) + Dist2 <- max((abs(rho - Rho))/(1 + abs(rho))) + Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi))) + dist2 <- max(Dist1, Dist2, Dist3) + + if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps))) + break + } + + affec = apply(gam, 1, which.max) + list(phi = phi, rho = rho, pi = pi, llh = llh, S = S, affec=affec) +} diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R new file mode 100644 index 0000000..4054e25 --- /dev/null +++ b/pkg/R/EMGrank.R @@ -0,0 +1,120 @@ +#' EMGrank +#' +#' Description de EMGrank +#' +#' @param Pi Parametre de proportion +#' @param Rho Parametre initial de variance renormalisé +#' @param mini Nombre minimal d'itérations dans l'algorithme EM +#' @param maxi Nombre maximal d'itérations dans l'algorithme EM +#' @param X Régresseurs +#' @param Y Réponse +#' @param eps Seuil pour accepter la convergence +#' @param rank Vecteur des rangs possibles +#' +#' @return A list ... +#' phi : parametre de moyenne renormalisé, calculé par l'EM +#' LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres +#' +#' @export +EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE) +{ + if (!fast) + { + # Function in R + return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, eps, rank)) + } + + # Function in C + n <- nrow(X) #nombre d'echantillons + p <- ncol(X) #nombre de covariables + m <- ncol(Y) #taille de Y (multivarié) + k <- length(Pi) #nombre de composantes dans le mélange + .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, as.integer(rank), phi = double(p * m * k), + LLF = double(1), n, p, m, k, PACKAGE = "valse") +} + +# helper to always have matrices as arg (TODO: put this elsewhere? improve?) --> +# Yes, we should use by-columns storage everywhere... [later!] +matricize <- function(X) +{ + if (!is.matrix(X)) + return(t(as.matrix(X))) + return(X) +} + +# R version - slow but easy to read +.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, eps, rank) +{ + # matrix dimensions + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(Pi) + + # init outputs + phi <- array(0, dim = c(p, m, k)) + Z <- rep(1, n) + LLF <- 0 + + # local variables + Phi <- array(0, dim = c(p, m, k)) + deltaPhi <- c() + sumDeltaPhi <- 0 + deltaPhiBufferSize <- 20 + + # main loop + ite <- 1 + while (ite <= mini || (ite <= maxi && sumDeltaPhi > eps)) + { + # M step: update for Beta ( and then phi) + for (r in 1:k) + { + Z_indice <- seq_len(n)[Z == r] #indices where Z == r + if (length(Z_indice) == 0) + next + # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% + crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ]))) + S <- s$d + # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation + # of the initial product + if (rank[r] < length(S)) + S[(rank[r] + 1):length(S)] <- 0 + phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r] + } + + # Step E and computation of the loglikelihood + sumLogLLF2 <- 0 + for (i in seq_len(n)) + { + sumLLF1 <- 0 + maxLogGamIR <- -Inf + for (r in seq_len(k)) + { + dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r]) + logGamIR <- log(Pi[r]) + log(gdet(Rho[, , r])) - 0.5 * dotProduct + # Z[i] = index of max (gam[i,]) + if (logGamIR > maxLogGamIR) + { + Z[i] <- r + maxLogGamIR <- logGamIR + } + sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2) + } + sumLogLLF2 <- sumLogLLF2 + log(sumLLF1) + } + + LLF <- -1/n * sumLogLLF2 + + # update distance parameter to check algorithm convergence (delta(phi, Phi)) + deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain? + if (length(deltaPhi) > deltaPhiBufferSize) + deltaPhi <- deltaPhi[2:length(deltaPhi)] + sumDeltaPhi <- sum(abs(deltaPhi)) + + # update other local variables + Phi <- phi + ite <- ite + 1 + } + return(list(phi = phi, LLF = LLF)) +} diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R new file mode 100644 index 0000000..ac0788a --- /dev/null +++ b/pkg/R/computeGridLambda.R @@ -0,0 +1,37 @@ +#' computeGridLambda +#' +#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator +#' +#' @param phiInit value for phi +#' @param rhoInit for rho +#' @param piInit for pi +#' @param gamInit value for gamma +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param gamma power of weights in the penalty +#' @param mini minimum number of iterations in EM algorithm +#' @param maxi maximum number of iterations in EM algorithm +#' @param eps threshold to stop EM algorithm +#' +#' @return the grid of regularization parameters +#' +#' @export +computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, + maxi, eps, fast) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + + list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, + X, Y, eps, fast) + + grid <- array(0, dim = c(p, m, k)) + for (j in 1:p) + { + for (mm in 1:m) + grid[j, mm, ] <- abs(list_EMG$S[j, mm, ])/(n * list_EMG$pi^gamma) + } + sort(unique(grid)) +} diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R new file mode 100644 index 0000000..d2a16bc --- /dev/null +++ b/pkg/R/constructionModelesLassoMLE.R @@ -0,0 +1,113 @@ +#' constructionModelesLassoMLE +#' +#' Construct a collection of models with the Lasso-MLE procedure. +#' +#' @param phiInit an initialization for phi, get by initSmallEM.R +#' @param rhoInit an initialization for rho, get by initSmallEM.R +#' @param piInit an initialization for pi, get by initSmallEM.R +#' @param gamInit an initialization for gam, get by initSmallEM.R +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param S output of selectVariables.R +#' @param ncores Number of cores, by default = 3 +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with several models, defined by phi, rho, pi, llh +#' +#' @export +constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, + maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose) +{ + if (ncores > 1) + { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit", + "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", + "ncores", "fast", "verbose")) + } + + # Individual model computation + computeAtLambda <- function(lambda) + { + if (ncores > 1) + require("valse") #nodes start with an empty environment + + if (verbose) + print(paste("Computations for lambda=", lambda)) + + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + k <- length(piInit) + sel.lambda <- S[[lambda]]$selected + # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix + col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars + if (length(col.sel) == 0) + return(NULL) + + # lambda == 0 because we compute the EMV: no penalization here + res <- EMGLLF(array(phiInit,dim=c(p,m,k))[col.sel, , ], rhoInit, piInit, gamInit, + mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast) + + # Eval dimension from the result + selected + phiLambda2 <- res$phi + rhoLambda <- res$rho + piLambda <- res$pi + phiLambda <- array(0, dim = c(p, m, k)) + for (j in seq_along(col.sel)) + phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ] + dimension <- length(unlist(sel.lambda)) + + ## Affectations + Gam <- matrix(0, ncol = length(piLambda), nrow = n) + for (i in 1:n) + { + for (r in 1:length(piLambda)) + { + sqNorm2 <- sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + Gam[i, r] <- piLambda[r] * exp(-0.5 * sqNorm2) * det(rhoLambda[, , r]) + } + } + Gam2 <- Gam/rowSums(Gam) + affec <- apply(Gam2, 1, which.max) + proba <- Gam2 + LLH <- c(sum(log(apply(Gam,1,sum))), (dimension + m + 1) * k - 1) + # ## Computation of the loglikelihood + # # Precompute det(rhoLambda[,,r]) for r in 1...k + # detRho <- sapply(1:k, function(r) gdet(rhoLambda[, , r])) + # sumLogLLH <- 0 + # for (i in 1:n) + # { + # # Update gam[,]; use log to avoid numerical problems + # logGam <- sapply(1:k, function(r) { + # log(piLambda[r]) + log(detRho[r]) - 0.5 * + # sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + # }) + # + # #logGam <- logGam - max(logGam) #adjust without changing proportions -> change the LLH + # gam <- exp(logGam) + # norm_fact <- sum(gam) + # sumLogLLH <- sumLogLLH + log(norm_fact) - m/2* log(2 * base::pi) + # } + #llhLambda <- c(-sumLogLLH/n, (dimension + m + 1) * k - 1) + list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = LLH, affec = affec, proba = proba) + } + + # For each lambda, computation of the parameters + out <- + if (ncores > 1) { + parLapply(cl, 1:length(S), computeAtLambda) + } else { + lapply(1:length(S), computeAtLambda) + } + + if (ncores > 1) + parallel::stopCluster(cl) + + out +} diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R new file mode 100644 index 0000000..dc88f67 --- /dev/null +++ b/pkg/R/constructionModelesLassoRank.R @@ -0,0 +1,95 @@ +#' constructionModelesLassoRank +#' +#' Construct a collection of models with the Lasso-Rank procedure. +#' +#' @param S output of selectVariables.R +#' @param k number of components +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1 +#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5 +#' @param ncores Number of cores, by default = 3 +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with several models, defined by phi, rho, pi, llh +#' +#' @export +constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, + ncores, fast, verbose) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + L <- length(S) + + # Possible interesting ranks + deltaRank <- rank.max - rank.min + 1 + Size <- deltaRank^k + RankLambda <- matrix(0, nrow = Size * L, ncol = k + 1) + for (r in 1:k) + { + # On veut le tableau de toutes les combinaisons de rangs possibles, et des + # lambdas Dans la première colonne : on répète (rank.max-rank.min)^(k-1) chaque + # chiffre : ça remplit la colonne Dans la deuxieme : on répète + # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ça (rank.max-rank.min)^2 + # fois ... Dans la dernière, on répète chaque chiffre une fois, et on fait ça + # (rank.min-rank.max)^(k-1) fois. + RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1), + each = deltaRank^(k - r)), each = L) + } + RankLambda[, k + 1] <- rep(1:L, times = Size) + + if (ncores > 1) + { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size", + "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", + "verbose")) + } + + computeAtLambda <- function(index) + { + lambdaIndex <- RankLambda[index, k + 1] + rankIndex <- RankLambda[index, 1:k] + if (ncores > 1) + require("valse") #workers start with an empty environment + + # 'relevant' will be the set of relevant columns + selected <- S[[lambdaIndex]]$selected + relevant <- c() + for (j in 1:p) + { + if (length(selected[[j]]) > 0) + relevant <- c(relevant, j) + } + if (max(rankIndex) < length(relevant)) + { + phi <- array(0, dim = c(p, m, k)) + if (length(relevant) > 0) + { + res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, + X[, relevant], Y, eps, rankIndex, fast) + llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m))) + phi[relevant, , ] <- res$phi + } + list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho) + } + } + + # For each lambda in the grid we compute the estimators + out <- + if (ncores > 1) { + parLapply(cl, seq_len(length(S) * Size), computeAtLambda) + } else { + lapply(seq_len(length(S) * Size), computeAtLambda) + } + + if (ncores > 1) + parallel::stopCluster(cl) + + out +} diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R new file mode 100644 index 0000000..064b54b --- /dev/null +++ b/pkg/R/generateXY.R @@ -0,0 +1,39 @@ +#' generateXY +#' +#' Generate a sample of (X,Y) of size n +#' +#' @param n sample size +#' @param π proportion for each cluster +#' @param meanX matrix of group means for covariates (of size p) +#' @param covX covariance for covariates (of size p*p) +#' @param β regression matrix, of size p*m*k +#' @param covY covariance for the response vector (of size m*m*K) +#' +#' @return list with X and Y +#' +#' @export +generateXY <- function(n, π, meanX, β, covX, covY) +{ + p <- dim(covX)[1] + m <- dim(covY)[1] + k <- dim(covY)[3] + + X <- matrix(nrow = 0, ncol = p) + Y <- matrix(nrow = 0, ncol = m) + + # random generation of the size of each population in X~Y (unordered) + sizePop <- rmultinom(1, n, π) + class <- c() #map i in 1:n --> index of class in 1:k + + for (i in 1:k) + { + class <- c(class, rep(i, sizePop[i])) + newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX) + X <- rbind(X, newBlockX) + Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% + β[, , i], covY[, , i])))) + } + + shuffle <- sample(n) + list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle]) +} diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R new file mode 100644 index 0000000..01147d7 --- /dev/null +++ b/pkg/R/initSmallEM.R @@ -0,0 +1,80 @@ +#' initialization of the EM algorithm +#' +#' @param k number of components +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' +#' @return a list with phiInit, rhoInit, piInit, gamInit +#' @export +#' @importFrom methods new +#' @importFrom stats cutree dist hclust runif +initSmallEM <- function(k, X, Y, fast) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + nIte <- 20 + Zinit1 <- array(0, dim = c(n, nIte)) + betaInit1 <- array(0, dim = c(p, m, k, nIte)) + sigmaInit1 <- array(0, dim = c(m, m, k, nIte)) + phiInit1 <- array(0, dim = c(p, m, k, nIte)) + rhoInit1 <- array(0, dim = c(m, m, k, nIte)) + Gam <- matrix(0, n, k) + piInit1 <- matrix(0, nIte, k) + gamInit1 <- array(0, dim = c(n, k, nIte)) + LLFinit1 <- list() + + # require(MASS) #Moore-Penrose generalized inverse of matrix + for (repet in 1:nIte) + { + distance_clus <- dist(cbind(X, Y)) + tree_hier <- hclust(distance_clus) + Zinit1[, repet] <- cutree(tree_hier, k) + + for (r in 1:k) + { + Z <- Zinit1[, repet] + Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r + if (length(Z_indice) == 1) { + betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% + crossprod(t(X[Z_indice, ]), Y[Z_indice, ]) + } else { + betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% + crossprod(X[Z_indice, ], Y[Z_indice, ]) + } + sigmaInit1[, , r, repet] <- diag(m) + phiInit1[, , r, repet] <- betaInit1[, , r, repet] #/ sigmaInit1[,,r,repet] + rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet]) + piInit1[repet, r] <- mean(Z == r) + } + + for (i in 1:n) + { + for (r in 1:k) + { + dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] + - X[i, ] %*% phiInit1[, , r, repet]) + Gam[i, r] <- piInit1[repet, r] * + det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct) + } + sumGamI <- sum(Gam[i, ]) + # TODO: next line is a division by zero if dotProduct is big + gamInit1[i, , repet] <- Gam[i, ]/sumGamI + } + + miniInit <- 10 + maxiInit <- 11 + + init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ], + gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y, + eps = 1e-04, fast) + LLFinit1[[repet]] <- init_EMG$llh + } + b <- which.min(LLFinit1) + phiInit <- phiInit1[, , , b] + rhoInit <- rhoInit1[, , , b] + piInit <- piInit1[b, ] + gamInit <- gamInit1[, , b] + + return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit)) +} diff --git a/pkg/R/main.R b/pkg/R/main.R new file mode 100644 index 0000000..387d553 --- /dev/null +++ b/pkg/R/main.R @@ -0,0 +1,152 @@ +#' valse +#' +#' Main function +#' +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param procedure among 'LassoMLE' or 'LassoRank' +#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC' +#' @param gamma integer for the power in the penaly, by default = 1 +#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10 +#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100 +#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4 +#' @param kmin integer, minimum number of clusters, by default = 2 +#' @param kmax integer, maximum number of clusters, by default = 10 +#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1 +#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5 +#' @param ncores_outer Number of cores for the outer loop on k +#' @param ncores_inner Number of cores for the inner loop on lambda +#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 +#' @param grid_lambda, a vector with regularization parameters if known, by default numeric(0) +#' @param size_coll_mod (Maximum) size of a collection of models +#' @param fast TRUE to use compiled C code, FALSE for R code only +#' @param verbose TRUE to show some execution traces +#' +#' @return a list with estimators of parameters +#' +#' @examples +#' #TODO: a few examples +#' @export +valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, + maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, + ncores_inner = 1, thresh = 1e-08, grid_lambda = numeric(0), size_coll_mod = 10, + fast = TRUE, verbose = FALSE, plot = TRUE) +{ + n <- nrow(X) + p <- ncol(X) + m <- ncol(Y) + + if (verbose) + print("main loop: over all k and all lambda") + + if (ncores_outer > 1) { + cl <- parallel::makeCluster(ncores_outer, outfile = "") + parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", + "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", + "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", + "size_coll_mod", "verbose", "p", "m")) + } + + # Compute models with k components + computeModels <- function(k) + { + if (ncores_outer > 1) + require("valse") #nodes start with an empty environment + + if (verbose) + print(paste("Parameters initialization for k =", k)) + # smallEM initializes parameters by k-means and regression model in each + # component, doing this 20 times, and keeping the values maximizing the + # likelihood after 10 iterations of the EM algorithm. + P <- initSmallEM(k, X, Y, fast) + if (length(grid_lambda) == 0) + { + grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, + X, Y, gamma, mini, maxi, eps, fast) + } + if (length(grid_lambda) > size_coll_mod) + grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)] + + if (verbose) + print("Compute relevant parameters") + # select variables according to each regularization parameter from the grid: + # S$selected corresponding to selected variables + S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, + gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast) + + if (procedure == "LassoMLE") { + if (verbose) + print("run the procedure Lasso-MLE") + # compute parameter estimations, with the Maximum Likelihood Estimator, + # restricted on selected variables. + models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, + P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose) + } else { + if (verbose) + print("run the procedure Lasso-Rank") + # compute parameter estimations, with the Low Rank Estimator, restricted on + # selected variables. + models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min, + rank.max, ncores_inner, fast, verbose) + } + # warning! Some models are NULL after running selectVariables + models <- models[sapply(models, function(cell) !is.null(cell))] + models + } + + # List (index k) of lists (index lambda) of models + models_list <- + if (ncores_outer > 1) { + parLapply(cl, kmin:kmax, computeModels) + } else { + lapply(kmin:kmax, computeModels) + } + if (ncores_outer > 1) + parallel::stopCluster(cl) + + if (!requireNamespace("capushe", quietly = TRUE)) + { + warning("'capushe' not available: returning all models") + return(models_list) + } + + # Get summary 'tableauRecap' from models + tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i) + { + models <- models_list[[i]] + # For a collection of models (same k, several lambda): + LLH <- sapply(models, function(model) model$llh[1]) + k <- length(models[[1]]$pi) + sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[, + , 1] != 0) + 1) - 1) + data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, + complexity = sumPen, contrast = -LLH) + })) + tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ] + + if (verbose == TRUE) + print(tableauRecap) + modSel <- capushe::capushe(tableauRecap, n) + indModSel <- if (selecMod == "DDSE") + { + as.numeric(modSel@DDSE@model) + } else if (selecMod == "Djump") + { + as.numeric(modSel@Djump@model) + } else if (selecMod == "BIC") + { + modSel@BIC_capushe$model + } else if (selecMod == "AIC") + { + modSel@AIC_capushe$model + } + + listMod <- as.integer(unlist(strsplit(as.character(indModSel), "[.]"))) + modelSel <- models_list[[listMod[1]]][[listMod[2]]] + modelSel$tableau <- tableauRecap + + if (plot) + print(plot_valse(X, Y, modelSel, n)) + + return(modelSel) +} diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R new file mode 100644 index 0000000..ec2302d --- /dev/null +++ b/pkg/R/plot_valse.R @@ -0,0 +1,89 @@ +#' Plot +#' +#' It is a function which plots relevant parameters +#' +#' @param X matrix of covariates (of size n*p) +#' @param Y matrix of responses (of size n*m) +#' @param model the model constructed by valse procedure +#' @param n sample size +#' @return several plots +#' +#' @examples TODO +#' +#' @export +#' +plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA) +{ + require("gridExtra") + require("ggplot2") + require("reshape2") + require("cowplot") + + K <- length(model$pi) + ## regression matrices + gReg <- list() + for (r in 1:K) + { + Melt <- melt(t((model$phi[, , r]))) + gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + + geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", + midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r)) + } + print(gReg) + + ## Differences between two clusters + if (comp) + { + if (is.na(k1) || is.na(k)) + print("k1 and k2 must be integers, representing the clusters you want to compare") + Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2])) + gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle(paste("Difference between regression matrices in cluster", + k1, "and", k2)) + print(gDiff) + } + + ### Covariance matrices + matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K) + for (r in 1:K) + matCov[, r] <- diag(model$rho[, , r]) + MeltCov <- melt(matCov) + gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle("Covariance matrices") + print(gCov) + + ### Proportions + gam2 <- matrix(NA, ncol = K, nrow = n) + for (i in 1:n) + gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i]) + + bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + + geom_boxplot() + + theme(legend.position = "none") + + background_grid(major = "xy", minor = "none") + print(bp) + + ### Mean in each cluster + XY <- cbind(X, Y) + XY_class <- list() + meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2]) + for (r in 1:K) + { + XY_class[[r]] <- XY[model$affec == r, ] + if (sum(model$affec == r) == 1) { + meanPerClass[, r] <- XY_class[[r]] + } else { + meanPerClass[, r] <- apply(XY_class[[r]], 2, mean) + } + } + data <- data.frame(mean = as.vector(meanPerClass), + cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K)) + g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster)) + print(g + geom_line(aes(linetype = cluster, color = cluster)) + + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster")) +} diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R new file mode 100644 index 0000000..bab45cc --- /dev/null +++ b/pkg/R/selectVariables.R @@ -0,0 +1,83 @@ +#' selectVariables +#' +#' It is a function which construct, for a given lambda, the sets of relevant variables. +#' +#' @param phiInit an initial estimator for phi (size: p*m*k) +#' @param rhoInit an initial estimator for rho (size: m*m*k) +#' @param piInit an initial estimator for pi (size : k) +#' @param gamInit an initial estimator for gamma +#' @param mini minimum number of iterations in EM algorithm +#' @param maxi maximum number of iterations in EM algorithm +#' @param gamma power in the penalty +#' @param glambda grid of regularization parameters +#' @param X matrix of regressors +#' @param Y matrix of responses +#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 +#' @param eps threshold to say that EM algorithm has converged +#' @param ncores Number or cores for parallel execution (1 to disable) +#' +#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi +#' +#' @examples TODO +#' +#' @export +#' +selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, + glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast) +{ + if (ncores > 1) { + cl <- parallel::makeCluster(ncores, outfile = "") + parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", + "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment()) + } + + # Computation for a fixed lambda + computeCoefs <- function(lambda) + { + params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, + X, Y, eps, fast) + + p <- ncol(X) + m <- ncol(Y) + + # selectedVariables: list where element j contains vector of selected variables + # in [1,m] + selectedVariables <- lapply(1:p, function(j) { + # from boolean matrix mxk of selected variables obtain the corresponding boolean + # m-vector, and finally return the corresponding indices + if (m>1) { + seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)] + } else { + if (any(params$phi[j, , ] > thresh)) + 1 + else + numeric(0) + } + }) + + list(selected = selectedVariables, Rho = params$rho, Pi = params$pi) + } + + # For each lambda in the grid, we compute the coefficients + out <- + if (ncores > 1) { + parLapply(cl, glambda, computeCoefs) + } else { + lapply(glambda, computeCoefs) + } + if (ncores > 1) + parallel::stopCluster(cl) + + print(out) + # Suppress models which are computed twice En fait, ca ca fait la comparaison de + # tous les parametres On veut juste supprimer ceux qui ont les memes variables + # sélectionnées + # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ] + selec <- lapply(out, function(model) model$selected) + ind_dup <- duplicated(selec) + ind_uniq <- which(!ind_dup) + out2 <- list() + for (l in 1:length(ind_uniq)) + out2[[l]] <- out[[ind_uniq[l]]] + out2 +} diff --git a/pkg/R/util.R b/pkg/R/util.R new file mode 100644 index 0000000..f8b01cc --- /dev/null +++ b/pkg/R/util.R @@ -0,0 +1,7 @@ +# ... +gdet <- function(M) +{ + if (is.matrix(M)) + return (det(M)) + return (M[1]) #numeric, double +} diff --git a/pkg/data/data.RData b/pkg/data/data.RData new file mode 100644 index 0000000000000000000000000000000000000000..a9f09e143adfa04f595a40b993efa688ebd05a4b GIT binary patch literal 1010 zcmVp&WZl>sV;MkEktY=rRsmk}pa$Ymf| zh%+!j5MfYaWQj0NjSwZ8tauaP}k|=7GfR?3xiC$wy8cy^2&Ku+%x#Sd9>t$ zLmDm^dp4WeYeFCEnoB`wICgw3@@~y4gPhV+@xcK*P%!AO`$5DRkTD)oXS7Ic^{*?? zW@d4z1MdXuj-ykhk3R&r1yFrc`xn~9sSvAlM@y(*Vfi)3U6 z6_Z?=CI9l)hCa+Y{C)q>;4LnlH&A!>UOY(J`%@WB*Kw+7R_I|=538Xe^IaxaV6LQE z${Gm5C2JFkVyl*<7&)30u6P0er*eaKb_bUdxM{YW--RQw)~7G!8eyO2PzAB0kEF)t z?XHy_!ZphSLNwX1fD4Q6{c8^!k<;Mak*tgWMAQeqci5;=}*!@7W0`L#=>CsH@lls&U?r! zc@iF6c@hNYlSY*_w~^;y75Te$4sIR|=P-7f!9+gWnOy2Zp&>Z7CWgV{3%AEc$_DH_ zI>bpVy9f&=2Rmjjlt7nvW^AOd048cSvZ`JygnrlCayPGi$Zr}>`8kaSB7KiJ-K~$X zKP>8Li?SOh8ynTuGYsIVqt0dNolu+}p1ZeL@fJ*RK52ejSBxS*_1**vLzoeFJ*W;i g2h$j?5gPyHEAZ5@~ literal 0 HcmV?d00001 diff --git a/pkg/data/data2.RData b/pkg/data/data2.RData new file mode 100644 index 0000000000000000000000000000000000000000..80003e3c090402d85e308401bb23159ebe1d53ca GIT binary patch literal 18385 zcmV(vK9w!7o5D=RJ-5M3~85y5TpT4>flw+^-(2AySBs3Udj4#=a}%qfT$rVfI8z-ra~% zB1xchqLg+qfo#aw;oL|PtlrVP(vazZfxnE>z8k!Q1sa;lxS#eg*2mVMqI4QZjXz&$ z=U5<+SF6AIny?qAF1q<2=_SRMapSSLsP{Nd^Vu_(`zA~~Uv8{9l?Y3(FE5@^|A`~{ zL(C#dNjMO5t(z;!yk5<;1I0 zutCP|XY;iM#;nxnGzG@6+1fjqucrg1pE)!fN`JD`?%oNV%hWhG)M(%sVFdj=jqw9w z!7ywawBTE2kALO!Xt#}pVa`?XTlryEEWcu=okqul?e3d{GULy1{9yZ@yciW|Y4vqa zmKem*^iulLv)V^2m^v&+WQ)ka82~-)A`af z(D?nb*GUz62xqo6C-*YM^j(F{Jyl)U+KaLd*X3ZVBQof(Jr|lK|LA1VC&6vai8fs| zQtT6nW%E+!z_EY}F3wtZVC(yM@seFH^oyk&u=j1n>2bkJw!RWrC;US?EJYYLfAS6$ z*@Z!>$)Q_n3kEn}@g*R3ngW}&9A}`40*1m;64f=%!CW5yletb(Y_Q%M3DkRzjeEFH zf2JD2!7JR~u6`DT7QPSe-&Q7ZrY}cxZTT)Vi(J-QKV5`#N^Cq$1ztoFyw_lw!GX4k zKW-ZaghM{*_;b&`0%*NtW}D1m zm0ersEqe6C^CgIOvRvNI;u3Jn>Y6nGq64c7Fe_>kH@A2NvT3n1J(Ykos z3-H*Go5~y&wk%x+)`MqpINpL~m*YKHYdm6+?P!aG(=OxnSsFOUSoZZ#`XShOF#q}& z-5F?lzCSy!<1O@4X>DAdUWe8WPhp*03LHDT_40gGDGW3kIlId}g}Jrm*HdP(P_rlF z!F`+C*vGN?_V9;aSaFk{g>sh$Haxybmc(F-bJ6jF)zKoj_WZHHG(j7N&gxt4K23!^ z)*VEPgg_Yckf{1Y9Rf2ql=sO|X=24Bt%EVc64bHF=kecrid*t2Pt(Ki!`#tm;a0m{ z*prof=bXnkBB{f1+0$BHm@iLrj`CI;u4r5TD(g{zNwX7%Q1=D$1Eixgx#*#xN`XM{DKzLT;im!Vf~{Ro#@ z437LQ=2z0b0bR2BF$GEGIGJ!slYW&JN5omfnmuQsC8+bbQ_?Zqc;ULNKU4x8MA7v# z-s(7S;2dMw+XaKPo*g##f523_!nOE2{y4BO_04rI3Tnyihc-T0Le||6-DeMU;Exra z6I*){phd^;o);|x7H7z5^raR+bd2FXGtV!udWurS!g2{4Z)I`$7q~!{74d4Oof=NA zFj~~wT4B4VyQJfv6ko!Fu$Mf;v-fSc_*FeaawBBwg$!262rU^RSS5ChT zb|sRZ5xC^e_ZgcmD}TPOk%nV~+_(0a?7=Qn$>hw^lJxONI)R<+4$ z+a>7iScuwi-s%55No7RN7PQ5RZ{m0ctoK!#-=O!FWi(Oc{e5PG1kW%4%DN^Wg?UxfV${70~-fUG@je z-@i{i^F~@&$5I?U|7K`g8nog4mNlFR3J0JMe&wHMVu$Fqctqp z<%l<|UR=#OJspA*$33o7ktjpq!-EeE2E}pPSn0v75_T928n)FG*aLH)q>hOOtYItr zu#&-YD$ch*j+Lg##MzF!JU+cXP#~@vV!cQQB~OIo6gXONPMRf$YK9sXZ%}KD1yw_O zPIu5piX{Yfg zb3{St)TJc&mMB8`MMVv4hR!idwz3jQQq-kQVi0GyFL#6)mEiajN?X^;EnFpC7ae5X zfT`g#BX{P;p~3jqXw8-$F8r=nepfGvi#e(TIV7tDiU3mo!6scCHr_A9lqL@I*>zdF zM*cuul=#*aZdPcCQE-qbz7E4`95?GZdazTn|BlZ#J4Qd+Jmy+y05%7-?f6;SaA|bc zne6Y~FkI2znX6fVGc>>SJ>AaXNZf0SJZ4##J7YoQqge*x-L>2$pIn$fyTmR*@PeKj z?#nW>uV918?qtP$6pkh8gfYBJf$8*oiO%*Om}5U`T5GM0MN|^Hsd+;%SWa#zLqd+- z)1^oM@QPuZ@xU8xOI4U@`dTw_?IDg_(B@hU7J??BM~-PCAvjrM?CtzV3|8%&+MA~6 zq1Mrz>S&WV3}2VA{m{@&pzA(rWUg9+`6ZjA&U*0#GEbGygjZS6nz^Unq?sRjnce%6 z$#&u7ZL7NT?b$HE>=`y*-U{gj#5`&3PdKgDO)6+fg-s>RE9)zixV+DI;f1>-E-jvY z!zU__4L)C`DBV9{ky=Olr1c1_z0JQo`s*n4$joW-O=QE*mx@|n9-bhQFh3)n|9Jvu zk!fnwH3b@1@7!j|JdO)ri+y&HRl(wV|6c#PqcHQnE9F+YF6L6zOk24(!eFd#7RNRp z3{%T#{W)|BMsz#kzE3_SP#F39+NOFyW#?ozOQ9;Ho`1pcLt6y?Ud*yOyzeWN%GmO< zR#w3zv!i3M7!^#_{|KQupNJc+y`guX-iDC{KUN9_UKpkjV{eOn39CM&cQefFaqv}L zjd!0i&LnG*YrVPzlNw_#=XG`w$i+-2o#;}DWI^OV1(^zAL8SA3|NVZ*eYP|-yLJd8 zE$o(3e}9Djrl(gQe6NMRJF8~b;(1_I%|GY&Yi$^aYH)g8QjSyf#cTJYcblk4hEly#ZQLBs+4YVoUD&H9&vx2K&g1?)3(?$L?4@27YaCpL#{Gy zUdxxD>M+$vuEasCWLnX_oZ$w&`$J>+lxDFmLS*xX%OZ?P?qlC4p$dt^K0cp3F5^;Z zR%V3kH(a9ZAg$I?0^;j#*Rhf|-01oArlR~ZWRUC~wih2G(y*&>*-+-;bj;ee)k!Cq zprw*9s6GcvKaC$|?K=UbwY@|)b61=qEI*iHD}xbnz7lumAOh)4a>Ih`K$vLva#XdB zfx!`T_c!jFFhPI$y3KQQ4E`izety#kW^DF9XS7nr*^vk0`v*i}@uB@ICL=3c+`jXn z;l4GrRSpQ66$|4cQ{ATv4|n{46H6Qo{+95Elr#I%U?J4$gl}H3XCP3V@aQ%ELBL-p z^YlVV#Gqk;`L3NKEmWZA&yVZRao~Pk=p8ph0_EEHNSkm64)AwRHeY)Ujg!9>KC-dH zIJH8F_Tg_t%CCyoLY1{}BiJ-Q-|8~Y8I#W3ny}bv&+cT58?7 zlminc?|(h+JBADNPPaDs?qkz{+-tLYj?k5D1-|hs&?*!gDAe^B`#et_VRUiBp55x{ zcKO#}CfwO_UrZ&Wlq<2nGbzCCqH~TOk!{dBPrm!FnLdmc$#I^~vx1oSGYxeoK42Q_ z(@*Sox?qHl!YEVuHl(z79Cx~+0X@z2y36;tfp~D2c;ihT*lH~%4?NWbU(}8<(ecYd zd(X=gPmUGh=<9g3sK=(TC7{pv3J7#K*5^*caM^^k(Z;fL6vU`}21;R8V?{TtFw~}$B8de`Y zeD9)n3~Fi3wvQzmU{l$bR#8(kC|^!|PHa(y;=1oY_x6=t&nA!}ur}*R2Q@>^Y#eQ5jHyQBAHf)(K zG`PW_34^x;EAY@xT)6+^w#)moM9PvY;sl{wBX$)#vV>*GMl=|{$=3~#1CE7+3ifa`a&hbQ!3}HrzY{#QhB2Eahdm!Ybusl3#%Rr}+=)c2nWn zO)Fcuk{y3Bkn_y}1__+M)Ny4jwF_5OKc*j{s(}_sPfOCzMOadl!pb)ziRF*(ObXIu z5=r>qEc7u6V`usFp9%UpoMxs;Q2Fx>=i+@oXIPwo5sJ63|iqFe!?E zJD~y11!ouSh*=o5H%;C(M-p0U6eLTvIk2Grud?U$7SY>@RVaw78MP@rA-c5(^!DQ1{Qc+|j1 zl;lFr~`#1b!`!J*|SSpX~59C5L$#03r@H%f_IKAe2$wC9mCD{9zYN#&w6zzqUv=jvuOPH(hs zu{G2|pL|E-`;HYH_!Ppc(H4&jPro0J^tg>Rk1VX7KbM06{R;JwiE(J;i+rEoWruzI zKVvvjL~&Tx{wY51!kV(Xw|;z$$Myr|S;K?-G5vnT{Ft@@3^6q|K6M|1*jDE7+BSVG z^41Vx3?adiOBV4Ars8<8m5j~9OHk(0VBmX6RY^5U`o6+ zRG?`RHtv18c{4X23t~J;=cl=FWZKW`p7|z@R4cx?!@-W5e)jJke>n#2u44-FOC2!e zzVx|#C=oYx7K#*p3POhF8TE+blAU;n_9_`k8w?M-Q2bUkfibPa$F6fJU^6+j$QR~x zOz*h#uB@I876b|_%#1=IrM~&qCmI&$A-S%W zD?qCw5BFCyVFIaeR-@CNSJ?ON#IumA(zq#=YyF4i0k+<@l9;hEB9J&9*{Y2>gPR-i z9&|zS*p$n*Aar&c=ga!XNN;vx+y0m)Qj&YHY!N_N#cv1gRXGuFv3;k#=V`~h8KK!T zN9^~dA*>Ph7HZhF4+@W)ujxpK;`U@Arg8q{* zRm&x7A8`)bA{fqX?wN$9iUPgCuKU0gw}yEZ^9q0op>y~WrP1S&aOQ3lDkib^D2AD z4W|jv!ug=mV4W(CwiceofE_1!o)pR6WLAq<q-xXf`~Wdp_WVNlof$p*Ju^y{pKj>e!BbQ$=yV>Z%kpBKtZig0dAxGr> z?e|fGE!`9!L_&YU1ztJMi#WmRRBp=i0GDZPXwoMSV+%)M{A*J~=yx&q(G+dL z{9VY9DY-->**||T&{PW&EwAtz#V+Dsl40$!-gi(M>)ZP&{KTn>E8#Ggg3{J_^LE+QbE&kUXI2+j~P&y<5-POD6UV}WAc=|`~=}dw~zvV-` z(Xr5RigLro*BiEF$&)glr$Bq$)HBP+Dp2`Xe)QaXY68X5m-%1pv)E(adXjUz0hWC7 zw0r{0T~)HVUJ?ug7fdykdGcUdTfpm~r9Y8m z`D>48MJd*ERm8`vR!~hvA2R9g#9>Qy<6~L>r?EB56aJp zB!PhuI_WUqzdQD8d)JFQRyaSuoV!MH4C<(ijtZO@!rb}Sq;Ir>U@^LuaLPg!R=-%$ zE=6SFIF(9aU%wYNaFShCevyQ;CkLZy!Y@J3)brruSurrDsdU%ya4arACJ~OQ2!XEL z)}*}J*95YRXPgANW$a;!p%2X%#LklfBrH{PFuRJ=x#pjtujBB@r!0M3y}16Fb1NDa zt`WRdt3P0s^a_z=^)qxHu*r=){tOq4eNT%g`a|zS4%f&iTUdMIePC?E7#6t1*6N>| z;XnjWw8=skt|<(?W=PM1>!a?E1V=^1%J*qxW>se?{5On#T#}Is}>)E_}sadW(@W zi8rCAW|8v5Vlr;>&k2m03}eHC*3(udmk1;o@`Q$cG#K4%Z8Gbig;Vv!mZGQD&?}I6 z=d+9kOwl)^l<^#_+$xxws@s`&%*-AFrVn@GszXJzG{UpuIzaCgVgzpf)K zXpSxGBUiW^%pi3(=uuUfG4|By9^#*C#U28sPLXFmd}moIzH2iF9m`a@!x!SPwKaO- zxTqI&%df<}j%>#%X7%MjRwi7K)l6Mt-SNY9hlQm-sN>{>ZPTqrLs-32o-0=115M^n zt;03wu;!rF;pYLVILVpJ&#Uqari$8h_n&+U?RsuMeMl>?|4TTtLzOJL|9QmdN4*T? zyjzcD?t4S)-$aIQ!A3Y5s)~KR-*8Ha`JMd4I?O42z3g&%6jM}qjxitS!r2>RD_`~q z!PxS19{V^gSeKp4_B8awzUEbZyxdn*~L1WmS6i#C7 zSi|ACO&vpNXKWrc8hQcyVf~?4r}2h1ft(p7oA_5?-9bmG_3bovQrvnHw`jKGuXuO= z<#fR2k`)TEsgLl7MojI;CI`;51Tft;a)zxK>kigO89VE>dRb^|1dN1d6ZaPCV_tk@ zlw`w4taaG3=2tw0y*(PGUP(6Csqkze^Dqti`z>1v(OiNW3io1Lt!bEx*=j%YqzmVd zo&KTt$O>!f6i0^7+r!+1mZQ9fE%g4Pe$VU63maF74xIZZV03!grk89Q`f9kYM)r5& z(8+Owl#27v{-&AUzf%p?{HNl2MHH~g?(px_>}{+s3{#zITflIwr;P)YB{=rp z;6SN@6gExW%U!m~!zRkPuiR<_Fx~3O!qC_84>@*n!v|(N1>?V`;YmmgcaDEc$#4Ci z>GlczRtuG|k3_~(`Q62@gb7soGFPfUZxNX(`~quLpF?NFTEy$_yF`j3X%V%Qo>)6} z{ARukAI@GI(dhIxg~b?mG1KWz0_h8(->P&CFqlOC@=N~)I?`QnIu-B>nk|-+i`@-j z{*vOU$VfT@$>+#RKk7?}BznQOD}xLn5m^kK*hmPh>}`=`!c{OM+C8(shYVNNE)jqC zDG>LQL`8d4ED%}Nf9pK9NFuN@s~?$Rvw-KbM#l4A-wDiImG(BzMTq1ZdlS62QwWrG z4TE~iY{cE!!W=b1H*n)g>4yvwAtDuzq5GuoPWx-U_dYQFC6Jc2vuBVo6Y12yCMn*1 ziJJqT{lfwe5J-~>*bJ{w6S-bY?{%UUAuwHS8N93^fos1%KiMm&4+GCrrw+xRA(HH! zzrU`vm%ui6G(BN>96J=l#qLE&VC{wV){(&&0`ra7zP_OYM4HpqPi&^8h}4$noqs|q zOjDxEcddF{l`~`gLN?&4Gn`*m7Rflt{-c`};x9 z!JX;U@JCKJiAb7uz?^^{+cXK0WGnx`L$5#r8RMRV@m0SGE=C~W7J-wgPhiz(H_irG zTl!!5Kwu73e{pl~CnB|6lzdav0&cl|AbX2UIHW+bJs@?GK$@^D6soZZi%yTj8yyN@ zgTH?E)3!%o1#8};#eYs{;mj^xwol%Jm?gT$QbK{5%v-(Yzz{+ zLMot5<1(9{_!sD<3*{eM=7sT@sNallA~52OXms4%FCwd2VFAZE2iR_#lp4rLBGUbK zHjkjxBXYj|9e(GI49p8!KRy=52@}fSp9|tEoI&Q~YqsG;8mqjb-+eoJxO7S2z!)VC z@_5^*c&ihrmP!VL4oxGKac8OS%6jg)+ym9Pl(hy*Pe&8M#I(;L-W^y z1tMFee05z>A}%iZk+uqy0g>s@-?!v}1nPk^%4ctk5NJPYqg1#y%&X;pcqEt&^Cb+C z_t=?;+@B_=uT>L?*qml-yYEEETG8FLNP#XIhW_WbB3=woMWKBut|0sqLKPn@|)N1#1(rd#e1 zFJUjkfa^+6EzFXS*?vE-2Fp1ro40N1aW?I+rsB;F;_iFfA`)s(A?1RK_8zNW#NEN% zA=3wF2^5XZ=^5r*uxh`z?78<6f#UX==&9hB7_RZ+OXWKP%+?=EKCpNQ7irwh==zUB zy@;n_L1hb(nx7%klXDfutXOwnnj6O6$R-ow=|qsTIAOYDS^$EG}-i%YOthIz++PQ3udRe7j9KPCXi=* z<8BB(Ngz|b@7ulFMqqj@Xn%=w50PHaxzfoXhQR1@@ms@@leiwIT^JK1L!{FG9xu7? zBh-&SrA~ED#3r`p=A$bTILmQdh2{A#7*#ueNa-*YfwXJ*x|ZiYTy4M5Z+=Y>`lzG2 zpXu)=uzi{eZKdUcuC9$4DGeKFYl~e~5S1aaK#;9gfG>1Esr;4RDoZ3MWOI=Ox8S@! z+2gyvDhVtD`V9f%L;^eaD^YP-VZ!e7Nh!>?4#MKFn(jk|Hk`2V_7wj37B+o8-uz=M zh)WBjDKXuF#9j7BJtf|}B`{Mw;Tm1d{7#JRy84{HeI!98L*;3Lu(AO~5)!h`>!9Is!F)bbOZ?MIB75@>&-YizF>_o` zUo_ztZlpZvUN;-tx!)QTFCQI*xs@?bgTkB89BFvpQ~M)MR4I2fB)lTh=iR5(rt^cv z69xwzcI#o|q(f4U_W^7~49^=mhxPeeZA)IiaC_*<&nl)M0tFe(3ty2EB8B9Hrd?(@ zku_ppYf5Mo&L;46>tA*xQtI(ar8Nd%`pxn0SH>!dtdHs6w`NlAgf-)O%+At8ved(F z@BdiAO3K%}cO}CKWMzhqyzlPAc2M{^Z#hbs=|0>_zDt+D@+MS5?1~Z$?BUNKjj1iHYm7U_<7=(~R*Bdz z@w+cI5$WY7wO4jGz$W+g9?QEXM1~TZ_(X9(B1K`_+an})L>9@DyDpPlg_VHIPq>V> zU{>iCn_kc~E*|I4HAqIBRA8GuU%gES$%{k3iu}eLZOPAc6MV8JiP9%>;^hhL9g$dSLbVfyqea zPF!z)*_6Gz61G1T#@Bf$!qPPclFS311eU8LS>fl&aJg4w?;n$gkig^AVkzx}F)wJp zUoBg}Y*G&Ow?TC<{H@9?-`<Gt zZ{Cr*M{7VJDUf}Y^wf$#vhZwAeFO=CIjSYV!+ef#P~>yC%XM#D5<2#jtWuLe?G@E1 zclb5V@RbZ^H%yK$g|m*vKzV?+kJj*0xCE}WTWzHb%sg1~%Sv2}ue z2FK*Qsw@61684CzMUI@-B#=dYd6HnxKqRLFgO|mLL~>>W-q(FRxbdv*SbR+wfsXm& z*K?1!VMD>vsBtL=$Aj9=U&sz7FxQYMFp-rLXi|U4-0Pbol4Y%rj3)IES#&>f`15n& zs*rN48rx-C@eAc zjhLbVVOQU9QFJ;NxOtnINMQKu><!6^cD zPRt{4pM(|K4eoU7H6j)Lr3-Dc{X6rNBeeZt368e2cgatA5NO9&7EO;dVO1pYMY-mC z0(oZjn=)%z*k--*@Ss2khG?4%7u)6HsJX7!htyywzpBQvuSO24wNHxM+x~*J6ZeG# z{T^Z`uN>EfnJ8Q$NWCP~M?)Rcdi^ykL+lx`bEuB~j>CK+RRbc&p~+`OP4k{KjyNtq z9xG_WF@|uqvP@=3&HJeGyfg}CN#j~kgbPQx9jAD9{Gi#VF}Z4cbFg*urM{CJInMtO zy!%y34VMm6QOPGqz?Q}1v+lu{cJ$!Zad#sarnGM}?+;7C(bzGnMwuumsP3{J&Ao%$ zjAw_=%jV)>L1u-b4I8$~9Xk8)9xwC-UFbC`@`VBF`x)AqLa<@(lhz>M3Tp>*ocRS} zaXOP?JD%egtYi*+);m*-L$h_J>brFa)K4fPt+MDK|8GOtC~-G5?3KLU|5FSnCr&Es zniye;!@b90gN3+gLYWkFy$7}*WJ$V3G+^5ows2<8_#MBLg6yyN3^si-%8b3V8^*W! z1j=+TLh$Mfj?Bm7xYTdLsp;N?YsUw2Lm7`@d+lfKo02J zdihd9T>^f<(%M;?Eh8!T+oncF>9>jz4naDmVi#d4e2>tO8z(lAkDs?0F(i<^ziq4g z<_T1$)Cn8Ec@D!V9&JYgmvMRK@P)BOUaYy~V`ZpdhchoUJwJ_Dqo-(@v{FzAq#sQ1 zes^3A=Y>_vGxhIb{ro8n*RKlDZ*+X4WY2yW4GJxj&x^!v7SqY(>MpFR+uNTpy@V|y zE58T(9kK4__GiEHEtn2Cob)nGddKeGl!z3u!6g|(FZzR7IC5S2;j4(35F=*tIaBW- z4n1Iu_*Hg}NYdh|eocN4zVPx$@4u6dlMUZ$Itxx?JEQEwFy&eJ`SkgGaTOi*ov;`C z!5xmp78iajQ##?y{F><=!73Q!8!^@R%!$25(ld;YgRy#e*XZ}hMX+oh!u)e~duM&4 zQP^J~gEPhU7jjyTLz&>CBeKp#FtSfJtacv}d&We(eomV~YP_P?Zgn!~JYALGGWQOb zf3efo86U;LAS!$l8-!!-{$wq113G#&UdQeCCy)+W4%Isp!vOO~SILGTm?_HIo}Q$D zf@T+m@RHroWxbTgqp1e%WuI;4PNx#dxt(8S4pk6HbkfPvxp@kgLWnDSZ52R#BA z!OqdN<{FWtpW#Zf1T!>q%=U-;Y=*Wa%G;Z5Q!uc=D4PAC0)M^xU2se{8`r|R0@%U| zp!)H3Pp|Pj=*lMJNU!XIV2`CZ$qRfi=|?ZuIGc#b!I0oN@E50Q?@BH>b7Swxh`KeY zqZpLnK1K2FJg%|Sbwu0OLuGCX4QPAe?HZ9I5zKABe3+4=@P?2A9w z_eBH7NSIYls^-EvulvV?yBJ~aR>fhLS6(o8B`>K>=rm5rvp9K5+CxKUB>8>SAzV3l zjgT8~0)|HGM#o-WhtS{VzA=KKSQlKk&3fw?E`0MyIhy$ZMiV-pfhx zvrH^lC932k=H-P|GnA%8irp~J5)v}SbO73f0@vr9)FJSEi+!+uBUDzKcb|&*2s6RK zYBR*WFzwVf-}IscW<-B~e?8lbOQ{boTQ4f(xT||8BlkSE3~mMx0;Vumb%x^**BO}k zD!)~C?GDb>#Kyeb>w!HaXG5$l$8p5^L5sKOHjK3&4v`xAjWut@3FnSlz?4R-c8YE{ zPAGTzDq5{V9{B|0uD4}7hbSUI;L0Vjbo$@S<2YJq@maCgF=az2lmZxoDToPzO$ZWoR1^M!q}UQgA`xuU@5-D zsqb4pO!CV1kZ29yP_f}OnOO`>8(7F5+vkkMhFirlRjSzUAhXQs`wjatc;&p4+hHYJ z#XscF5Y%{S3{&dlW9&KPq?ww6deMN77i8pc{NA!r?7^$p7+626K+`Z)5LFpLAyaB&PjD7d7u* zSSC!Xd1mE9kEl$>?TlWS_MTzCvG*3vzjj0OXH?kqdcBeSLJ%Yunq(iz6vwi<)!Fiv zeX#I2+U&!WAI#WayQuS_3Yye2I-WEK;ov=m4ws6Z=WO2rM&|e{xcqYeUHA;vj=zIH>xdZ8QEhw8q_0OS|d}wTq$icJ+^8>Bd%tS3?V| zopG486F&-_kExgr)y?4a%e5}yOle$lc@*C@aU7@2s`nF!%gt`LH3dys_VPNje(O9u_u-=gudn>mBXPpnLNJ(D8C2e+t(76$8HEJ~? zJ>QCxaji-P(ac0riky+7mkVI&&U;n4tG}@3>ZSe4d%a<6ygRmEpABZJTvoqWT4R3U zDZWI3GB98pP`v3J4|Nx1Ma#t#A&_4kYcA4@a^w8gEQ)jykgN0kizIrO- zdbU@vS1=upT^7kNDYN@?%azobg?beAsMyn&78TfG3)9ed6eNY_8lM<+FU*jgWp&8|$^S^Wj6 zVLfu}edEHQJ7A2J!v}U1NGoC!jf20g2p5d$Y}aw})x%V#QX|(NJLu!^vTeQY3#07T zy5yH{z=+H92>!AC@qvjjpQ$hfJowj~YM3 z>HEIF<$~YihE;~0?BQ3~Ql0s@)#N*F-Z$epZ4cfyFDcEkseN^I26wIg}p?sJ77mH&Kzhbo2#qf(=j^9envDRPHDJ+K+x4T1U z8J8~r@yGtEkku;a9n;H2Ra0zP@f@~wl)>8A$sc(vmvLF5Eiy_a633fNsJYy4@5E^e zN!8syp-)ld#xJ9t>s6ObwGyZS4P&^3e=I(FsMRpQ*1`J+R@JL4~X zSe+ikjROiHjmh8c!CZ}0fDg|HZ2H`*mOXtLW~7{MGrSDN{^WogF)XUM7@L^5#&{I^ zqi;M~ytM*-Mcab5SFCYNf6zutZ3ssB-7XQBePFeqGRG?5CC)uFAd*(-;ozCh=Fm?e z@KK|0>6Ex44vg5Oeqd6=vCUhXq*_QMl_Swmxjl)we!s3=I=+OvZcK8h0hZ$j0}gJ~CFE`1($jPSz2c66bF$edopnYlNT&f|6#1_ zEfdJC}8z^57gz#RY*2ZMb<{t-3}r3i~PyMg4m?5t-$v->KJY)?KJO%}vyU&AD^<$E|dnMCsZ<0woy zC&?}L8Q}Eo4V7!P;jmyV!1*=V99NUxI?Ab&;PCe|cf`~0VafKnAn%twP{EVGJ>V?` zje;|*u4n29q)Y*_!Khh7yH3XW!p>FmYMVj0<+X9lt1&Pe}~1vO0WJ^Yj2 zE*pAfkEOKnDr1LT=)e*+H{^Y5Vwb(IhP~x4Iae-=5$P29t807rK|gP%qLY#bfo!r{ zCp}po+vPtLWNMV*%;eIWG%HuEUJO2SJl`0$WY%u#+n86vSb{2N_=kbD_v!BDV9fHVl0jv7NsWiwlmvexlxXxMDF@Z5bU5 zV-`J6RBN?i>lE#;xQ~0WR4g%Z%%cWpaGo%6BNAE!hHJuZ+<;z_pPrB6f_C&Yx7q1# z2Wzhr#P8=&Lf2ZLZrgVnY@|s3b7QFrM!(~}h-7XkJsJ~Ao+t$is=Uh!qwJ7&j7|OP zco1$Jo4=a2=z=o^5tZ=@*P!jwt+b&R^RU@zEu-;H4o5BX4cD2iprJ(Al-xNVJ9Q4G z*L~S(pD*+b&jCGH+UPvfdiE3KB;0T4jTgb5bKkdQ-0gAUSy1j4$r-3KxjXpG^cl`p zTx(_9eGz(lIfqrh-NeOHcO~}K=tA2={?A)!J}`gy)^Bp2M63>BquV`44dXG_)&(ZN z;&zCVvAfa-9GXtGYwwanyU%xg_g0qSLZP&N!+a7pF$XZcj$y;T_zD6Y=YAM`Pstl< z{Q)*<6mE|n>4ZM6wvSp-e6aY_!=BUmJ(0FkaNUBr9I8Iw+*{)eFjlR~X;b|a#?Fj9 zbt(=;;v&!~KH7(UQytmVQg2|CB;+a2^9<;zDm+2s6$kyU%g!G5uQ2OPY)t4XId=c7 z=DTuyFE$bg&c7s~6$&T%>x!RSik>5*khBAH9IOnJlr_F775+|RxNON>G(c4sdV zX#eC2pH8fRh2tOp^7T!@vP)pBh}&Tt^!uDrw8sbsv&9_rzu$%Kz(2pN9zDmJ@ofvi zGnKfpZ|I0dk`N{}wDGA;R6|2(%O@qaoptc{bAf|GmbkiW#Jg@@2bNc&m4@a|;Y@?g z=iPtoVRk{*VD`&2_D-c|k)?y?M@ zC4r4z!~r438S*$UY-aL~6b?VPv#x8s3Q2khGs+|VmnRmm)8J%$ zi0T1YILj+3yQzo)shW)fj@20VbD#Tw`aYOAHy`)Tu;M?`?PS~k;~e|HpELaTbo_tF z$o?Bo`VahnFDLs4Iobb2{?F?FGmL+%^N;mO{|WB@tl$5mHvg~c{lHT}FM&~~-yJxEZQl)3qGjplKIC*^-&Vo;-=)%@N*)R4FGjqhuuvk8{O z9MhJHj^GasbL1Mqso!Fw3AEC-%!$kIIRexQ)>353*rO|!<6V;Lb(~H2SC2%gUaEL! zXr*FMF4LhkzbiWGwJ@8bb~|&KnxJH!-_JCma5%JWmRv%uCVe_);_uYcTZbEH&LNvb zGM_rtqK0A=dyB6_Z;MOLZsn>2NBwO^Zmxe@>28$!sHFcY?Y^subzhCcfjXMM)HjW? zQsm!9u`L@G+@dB^G94{frV~p)?m-L_rIvQiTc>s%Pi-F3Os7(|4|z^x$t>aeV{<%h zp_<~`>j>^QQJrNu%c87;V@!-M?OJ2v&W>$-KfO};YN08aH<&z3uJNm<)t_Un*+mEKWW|EA{p4_y%qCGqAoyE!hsNs`F-k*_XGLXUD zmUtRj%@notT@iKZshFp4_CmKezZdee9-3hmn4b{8=jh{E4YW)2#3l0lWu8_|ns5@O|?%F<|RgvGo;W zg?HI6oAy%pO@EqNHS`%jFdM77RolqX1~IwW64M3wJkPs}r$7CPCmcdu>wJ_aVA zdus0`Fpzg^LSOEa$ZoyQrQ^mVlzckXA0KlLy*(YnQ)9QfJH6BRmd_)GnsN!RZ`|$O zhA(&&%x}bnP~POCsX2Gu)mSfaWsLrUm!{1r-JGzkDXsB@kNFu7I_C#?;giz${Tawr zqC=si5nGWy>vu=MLgd!TFP7nVdUfgH+i7`0Ub4ef7JD@3FHk&Ka7k(I+vQue3jL79yx6@>iM%+){tp*qujmw8gL+SSl)(-G~cwLf}3 z2F;INd^xoITkk`~#6_x{b7HQyZsZ|-%eU$IBZ+27PnzG-a{J#`q`-CNNQ43C$=D(P`T+l>r{DnXqMOF$Y~MvRB@y8F5l{R7aqD~7SHgNYm79WXVTZk z_VL+)7`j&{s!vew&(+;+It6Y^$cW6SbAg+zSPrsyxHxgR}?OY4=j?7ury{%y^dM zrj%ddbW*?tJ)x@2zF52Oiz+U4TT2HGBQ&+M3>?#{_WU-dNj)|3=ek^#rG)VrfvmnO z)bY*lLV7iv=#Or~6Y+HF$(Oc?*)){N7kMT=tNR`?_DbstW9A^gpKDZU^nzzx_)Jh7 zduW-*>AXq*G~MxYuXd@-iT6|K#jAWTf0L_yhUDFnWEf9!*~sdx$XYvtM^PU#e@ZaT zyN76o*&62G5LEpheD$sJtuuMcQhG99JO*Ma`Hp=Y<0E~SmNT<0zA%19@k-J~RTsMl zOW9^bjieyk{Ak6Rkwz)0!)DG>Uv8w7TG(Aoc*-0n#P?Y7NW_gB>mx<1F68a4LlrOY zc>gsmSNP5KQNl^EeBlmb_oIuW18j7Av*;bZn^_%lB;BK9$7nr%L!5V6Z7#~)>Q)+^>-gw57n5Ib(qtDY2{Z>QanKPdPiFf+T!>ot?yvyY#CEcL!yn*4L{ z>aPglMV_gKsl&|ySL9T_jh=Ql3auXMy?AY=_d-S?^X<51iwB{;H|2Gu>Du40=QoH% z5%Y=CYGctE96iydn_|Z#OGKS-DgP$;oeiP*s^@OzM?QDXfhP!EQ_|PX9+CMS+%=@q zn^Y~V6;eGdBmREFQ?0=Eb%J7d@39=B5P@ufn*SsX%NyfKdv)ae%NCKZe4V~Xk; zt`USu=umq&aqR+g_4N>BxTuy_KC(xjz8V| zX+UJesWqVdO85;XBcOeakAQ9lnG>L-N0b{)PK5`rmCwM%|AqU%1MdG{`~5H6f8qXrgZn?TJtSNI2e2Gl*uqx;0L*yv5C8xG literal 0 HcmV?d00001 diff --git a/pkg/inst/testdata/TODO.csv b/pkg/inst/testdata/TODO.csv new file mode 100644 index 0000000..d679966 --- /dev/null +++ b/pkg/inst/testdata/TODO.csv @@ -0,0 +1 @@ +ou alors data_test.RData, possible aussi diff --git a/pkg/man/valse-package.Rd b/pkg/man/valse-package.Rd new file mode 100644 index 0000000..534375b --- /dev/null +++ b/pkg/man/valse-package.Rd @@ -0,0 +1,37 @@ +\name{valse-package} +\alias{valse-package} +\alias{valse} +\docType{package} + +\title{ + \packageTitle{valse} +} + +\description{ + \packageDescription{valse} +} + +\details{ + The package devtools should be useful in development stage, since we rely on testthat for + unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette. + Concerning the other suggested packages: + \itemize{ + \item{parallel (generally) permits to run the bootstrap method faster.} + } + + The three main functions are ... +} + +\author{ + \packageAuthor{valse} + + Maintainer: \packageMaintainer{valse} +} + +%\references{ +% TODO: Literature or other references for background information +%} + +%\examples{ +% TODO: simple examples of the most important functions +%} diff --git a/pkg/src/Makevars b/pkg/src/Makevars new file mode 100644 index 0000000..50b7fb6 --- /dev/null +++ b/pkg/src/Makevars @@ -0,0 +1,11 @@ +#Debug flags +PKG_CFLAGS=-g -I./sources + +#Prod flags: +#PKG_CFLAGS=-O2 -I./sources + +PKG_LIBS=-lm -lgsl -lcblas + +SOURCES = $(wildcard adapters/*.c sources/*.c) + +OBJECTS = $(SOURCES:.c=.o) diff --git a/pkg/src/adapters/a.EMGLLF.c b/pkg/src/adapters/a.EMGLLF.c new file mode 100644 index 0000000..9b004c2 --- /dev/null +++ b/pkg/src/adapters/a.EMGLLF.c @@ -0,0 +1,91 @@ +#include +#include +#include "EMGLLF.h" + +// See comments in src/sources/EMGLLF.c and R/EMGLLF.R (wrapper) +SEXP EMGLLF( + SEXP phiInit_, + SEXP rhoInit_, + SEXP piInit_, + SEXP gamInit_, + SEXP mini_, + SEXP maxi_, + SEXP gamma_, + SEXP lambda_, + SEXP X_, + SEXP Y_, + SEXP eps_ +) { + // Get matrices dimensions + int n = INTEGER(getAttrib(X_, R_DimSymbol))[0]; + SEXP dim = getAttrib(phiInit_, R_DimSymbol); + int p = INTEGER(dim)[0]; + int m = INTEGER(dim)[1]; + int k = INTEGER(dim)[2]; + + //////////// + // INPUTS // + //////////// + + // get scalar parameters + int mini = INTEGER_VALUE(mini_); + int maxi = INTEGER_VALUE(maxi_); + double gamma = NUMERIC_VALUE(gamma_); + double lambda = NUMERIC_VALUE(lambda_); + double eps = NUMERIC_VALUE(eps_); + + // Get pointers from SEXP arrays ; WARNING: by columns ! + double* phiInit = REAL(phiInit_); + double* rhoInit = REAL(rhoInit_); + double* piInit = REAL(piInit_); + double* gamInit = REAL(gamInit_); + double* X = REAL(X_); + double* Y = REAL(Y_); + + ///////////// + // OUTPUTS // + ///////////// + + SEXP phi, rho, pi, llh, S, affec, dimPhiS, dimRho; + PROTECT(dimPhiS = allocVector(INTSXP, 3)); + int* pDimPhiS = INTEGER(dimPhiS); + pDimPhiS[0] = p; pDimPhiS[1] = m; pDimPhiS[2] = k; + PROTECT(dimRho = allocVector(INTSXP, 3)); + int* pDimRho = INTEGER(dimRho); + pDimRho[0] = m; pDimRho[1] = m; pDimRho[2] = k; + PROTECT(phi = allocArray(REALSXP, dimPhiS)); + PROTECT(rho = allocArray(REALSXP, dimRho)); + PROTECT(pi = allocVector(REALSXP, k)); + PROTECT(llh = allocVector(REALSXP, 1)); + PROTECT(S = allocArray(REALSXP, dimPhiS)); + PROTECT(affec = allocVector(INTSXP, n)); + double *pPhi=REAL(phi), *pRho=REAL(rho), *pPi=REAL(pi), *pLlh=REAL(llh), *pS=REAL(S); + int *pAffec=INTEGER(affec); + + //////////////////// + // Call to EMGLLF // + //////////////////// + + EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps, + pPhi,pRho,pPi,pLlh,pS,pAffec, + n,p,m,k); + + // Build list from OUT params and return it + SEXP listParams, listNames; + int nouts = 6; + PROTECT(listParams = allocVector(VECSXP, nouts)); + char* lnames[6] = {"phi", "rho", "pi", "llh", "S", "affec"}; //lists labels + PROTECT(listNames = allocVector(STRSXP,nouts)); + for (int i=0; i +#include +#include "EMGrank.h" + +// See comments in src/sources/EMGrank.c and R/EMGrank.R (wrapper) +SEXP EMGrank( + SEXP Pi_, + SEXP Rho_, + SEXP mini_, + SEXP maxi_, + SEXP X_, + SEXP Y_, + SEXP eps_, + SEXP rank_ +) { + // Get matrices dimensions + SEXP dimX = getAttrib(X_, R_DimSymbol); + int n = INTEGER(dimX)[0]; + int p = INTEGER(dimX)[1]; + SEXP dimRho = getAttrib(Rho_, R_DimSymbol); + int m = INTEGER(dimRho)[0]; + int k = INTEGER(dimRho)[2]; + + //////////// + // INPUTS // + //////////// + + // get scalar parameters + int mini = INTEGER_VALUE(mini_); + int maxi = INTEGER_VALUE(maxi_); + double eps = NUMERIC_VALUE(eps_); + + // Get pointers from SEXP arrays ; WARNING: by columns ! + double* Pi = REAL(Pi_); + double* Rho = REAL(Rho_); + double* X = REAL(X_); + double* Y = REAL(Y_); + int* rank = INTEGER(rank_); + + ///////////// + // OUTPUTS // + ///////////// + + SEXP phi, LLF, dimPhi; + PROTECT(dimPhi = allocVector(INTSXP, 3)); + int* pDimPhi = INTEGER(dimPhi); + pDimPhi[0] = p; pDimPhi[1] = m; pDimPhi[2] = k; + PROTECT(phi = allocArray(REALSXP, dimPhi)); + PROTECT(LLF = allocVector(REALSXP, 1)); + double *pPhi=REAL(phi), *pLLF=REAL(LLF); + + ///////////////////// + // Call to EMGrank // + ///////////////////// + + EMGrank_core(Pi, Rho, mini, maxi, X, Y, eps, rank, + pPhi,pLLF, + n,p,m,k); + + // Build list from OUT params and return it + SEXP listParams, listNames; + PROTECT(listParams = allocVector(VECSXP, 2)); + char* lnames[2] = {"phi", "LLF"}; //lists labels + PROTECT(listNames = allocVector(STRSXP,2)); + for (int i=0; i<2; i++) + SET_STRING_ELT(listNames,i,mkChar(lnames[i])); + setAttrib(listParams, R_NamesSymbol, listNames); + SET_VECTOR_ELT(listParams, 0, phi); + SET_VECTOR_ELT(listParams, 1, LLF); + + UNPROTECT(5); + return listParams; +} diff --git a/pkg/src/sources/EMGLLF.c b/pkg/src/sources/EMGLLF.c new file mode 100644 index 0000000..e8b3b84 --- /dev/null +++ b/pkg/src/sources/EMGLLF.c @@ -0,0 +1,420 @@ +#include "utils.h" +#include +#include +#include + +// TODO: don't recompute indexes ai(...) and mi(...) when possible +void EMGLLF_core( + // IN parameters + const Real* phiInit, // parametre initial de moyenne renormalisé + const Real* rhoInit, // parametre initial de variance renormalisé + const Real* piInit, // parametre initial des proportions + const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon + int mini, // nombre minimal d'itérations dans l'algorithme EM + int maxi, // nombre maximal d'itérations dans l'algorithme EM + Real gamma, // puissance des proportions dans la pénalisation pour un Lasso adaptatif + Real lambda, // valeur du paramètre de régularisation du Lasso + const Real* X, // régresseurs + const Real* Y, // réponse + Real eps, // seuil pour accepter la convergence + // OUT parameters (all pointers, to be modified) + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* rho, // parametre de variance renormalisé, calculé par l'EM + Real* pi, // parametre des proportions renormalisé, calculé par l'EM + Real* llh, // (derniere) log vraisemblance associée à cet échantillon, + // pour les valeurs estimées des paramètres + Real* S, + int* affec, + // additional size parameters + int n, // nombre d'echantillons + int p, // nombre de covariables + int m, // taille de Y (multivarié) + int k) // nombre de composantes dans le mélange +{ + //Initialize outputs + copyArray(phiInit, phi, p*m*k); + copyArray(rhoInit, rho, m*m*k); + copyArray(piInit, pi, k); + //S is already allocated, and doesn't need to be 'zeroed' + + //Other local variables: same as in R + Real* gam = (Real*)malloc(n*k*sizeof(Real)); + Real* logGam = (Real*)malloc(k*sizeof(Real)); + copyArray(gamInit, gam, n*k); + Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real)); + Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real)); + Real* b = (Real*)malloc(k*sizeof(Real)); + Real* X2 = (Real*)malloc(n*p*k*sizeof(Real)); + Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real)); + *llh = -INFINITY; + Real* pi2 = (Real*)malloc(k*sizeof(Real)); + // Additional (not at this place, in R file) + Real* gam2 = (Real*)malloc(k*sizeof(Real)); + Real* sqNorm2 = (Real*)malloc(k*sizeof(Real)); + Real* detRho = (Real*)malloc(k*sizeof(Real)); + gsl_matrix* matrix = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + const Real gaussConstM = pow(2.*M_PI,m/2.); + Real* Phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* Rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* Pi = (Real*)malloc(k*sizeof(Real)); + + for (int ite=1; ite<=maxi; ite++) + { + copyArray(phi, Phi, p*m*k); + copyArray(rho, Rho, m*m*k); + copyArray(pi, Pi, k); + + // Calculs associés a Y et X + for (int r=0; r= 0) + pi2AllPositive = 1; + for (int r=0; r + Real dotProduct = 0.; + for (int u=0; u n*lambda*pirPowGamma) + { + phi[ai(j,mm,r,p,m,k)] = (n*lambda*pirPowGamma - S[ai(j,mm,r,p,m,k)]) + / Gram2[ai(j,j,r,p,p,k)]; + } + else + { + phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pirPowGamma + S[ai(j,mm,r,p,m,k)]) + / Gram2[ai(j,j,r,p,p,k)]; + } + } + } + } + + ///////////// + // Etape E // + ///////////// + + // Precompute det(rho[,,r]) for r in 1...k + int signum; + for (int r=0; rdata[u*m+v] = rho[ai(u,v,r,m,m,k)]; + } + gsl_linalg_LU_decomp(matrix, permutation, &signum); + detRho[r] = gsl_linalg_LU_det(matrix, signum); + } + + Real sumLogLLH = 0.; + for (int i=0; i Dist1) + Dist1 = tmpDist; + } + } + } + //Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) + Real Dist2 = 0.; + for (int u=0; u Dist2) + Dist2 = tmpDist; + } + } + } + //Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi))) + Real Dist3 = 0.; + for (int u=0; u Dist3) + Dist3 = tmpDist; + } + } + //dist2=max([max(Dist1),max(Dist2),max(Dist3)]); + Real dist2 = Dist1; + if (Dist2 > dist2) + dist2 = Dist2; + if (Dist3 > dist2) + dist2 = Dist3; + + if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps))) + break; + } + + //affec = apply(gam, 1, which.max) + for (int i=0; i rowMax) + { + affec[i] = j+1; //R indices start at 1 + rowMax = gam[mi(i,j,n,k)]; + } + } + } + + //free memory + free(b); + free(gam); + free(logGam); + free(Phi); + free(Rho); + free(Pi); + free(Gram2); + free(ps2); + free(detRho); + gsl_matrix_free(matrix); + gsl_permutation_free(permutation); + free(XiPhiR); + free(YiRhoR); + free(gam2); + free(pi2); + free(X2); + free(Y2); + free(sqNorm2); +} diff --git a/pkg/src/sources/EMGLLF.h b/pkg/src/sources/EMGLLF.h new file mode 100644 index 0000000..e15cb87 --- /dev/null +++ b/pkg/src/sources/EMGLLF.h @@ -0,0 +1,32 @@ +#ifndef valse_EMGLLF_H +#define valse_EMGLLF_H + +#include "utils.h" + +void EMGLLF_core( + // IN parameters + const Real* phiInit, + const Real* rhoInit, + const Real* piInit, + const Real* gamInit, + int mini, + int maxi, + Real gamma, + Real lambda, + const Real* X, + const Real* Y, + Real tau, + // OUT parameters + Real* phi, + Real* rho, + Real* pi, + Real* LLF, + Real* S, + int* affec, + // additional size parameters + int n, + int p, + int m, + int k); + +#endif diff --git a/pkg/src/sources/EMGrank.c b/pkg/src/sources/EMGrank.c new file mode 100644 index 0000000..3a9bf94 --- /dev/null +++ b/pkg/src/sources/EMGrank.c @@ -0,0 +1,307 @@ +#include +#include +#include "utils.h" + +// Compute pseudo-inverse of a square matrix +static Real* pinv(const Real* matrix, int dim) +{ + gsl_matrix* U = gsl_matrix_alloc(dim,dim); + gsl_matrix* V = gsl_matrix_alloc(dim,dim); + gsl_vector* S = gsl_vector_alloc(dim); + gsl_vector* work = gsl_vector_alloc(dim); + Real EPS = 1e-10; //threshold for singular value "== 0" + + //copy matrix into U + copyArray(matrix, U->data, dim*dim); + + //U,S,V = SVD of matrix + gsl_linalg_SV_decomp(U, V, S, work); + gsl_vector_free(work); + + // Obtain pseudo-inverse by V*S^{-1}*t(U) + Real* inverse = (Real*)malloc(dim*dim*sizeof(Real)); + for (int i=0; idata[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j]; + inverse[i*dim+ii] = dotProduct; + } + } + + gsl_matrix_free(U); + gsl_matrix_free(V); + gsl_vector_free(S); + return inverse; +} + +// TODO: comment EMGrank purpose +void EMGrank_core( + // IN parameters + const Real* Pi, // parametre de proportion + const Real* Rho, // parametre initial de variance renormalisé + int mini, // nombre minimal d'itérations dans l'algorithme EM + int maxi, // nombre maximal d'itérations dans l'algorithme EM + const Real* X, // régresseurs + const Real* Y, // réponse + Real tau, // seuil pour accepter la convergence + const int* rank, // vecteur des rangs possibles + // OUT parameters + Real* phi, // parametre de moyenne renormalisé, calculé par l'EM + Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres + // additional size parameters + int n, // taille de l'echantillon + int p, // nombre de covariables + int m, // taille de Y (multivarié) + int k) // nombre de composantes +{ + // Allocations, initializations + Real* Phi = (Real*)calloc(p*m*k,sizeof(Real)); + Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real)); + int signum; + Real invN = 1.0/n; + int deltaPhiBufferSize = 20; + Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real)); + int ite = 0; + Real sumDeltaPhi = 0.0; + Real* YiRhoR = (Real*)malloc(m*sizeof(Real)); + Real* XiPhiR = (Real*)malloc(m*sizeof(Real)); + Real* Xr = (Real*)malloc(n*p*sizeof(Real)); + Real* Yr = (Real*)malloc(n*m*sizeof(Real)); + Real* tXrXr = (Real*)malloc(p*p*sizeof(Real)); + Real* tXrYr = (Real*)malloc(p*m*sizeof(Real)); + gsl_matrix* matrixM = gsl_matrix_alloc(p, m); + gsl_matrix* matrixE = gsl_matrix_alloc(m, m); + gsl_permutation* permutation = gsl_permutation_alloc(m); + gsl_matrix* V = gsl_matrix_alloc(m,m); + gsl_vector* S = gsl_vector_alloc(m); + gsl_vector* work = gsl_vector_alloc(m); + + //Initialize class memberships (all elements in class 0; TODO: randomize ?) + int* Z = (int*)calloc(n, sizeof(int)); + + //Initialize phi to zero, because some M loops might exit before phi affectation + zeroArray(phi, p*m*k); + + while (itetau)) + { + ///////////// + // Etape M // + ///////////// + + //M step: Mise à jour de Beta (et donc phi) + for (int r=0; rdata[j*m+jj] = dotProduct; + } + } + free(invTXrXr); + + //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr + gsl_linalg_SV_decomp(matrixM, V, S, work); + + //Set m-rank(r) singular values to zero, and recompose + //best rank(r) approximation of the initial product + for (int j=rank[r]; jdata[j] = 0.0; + + //[intermediate step] Compute hatBetaR = U * S * t(V) + double* U = matrixM->data; //GSL require double precision + for (int j=0; jdata[u] * V->data[jj*m+u]; + hatBetaR[mi(j,jj,p,m)] = dotProduct; + } + } + + //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r) + for (int j=0; jdata[j*m+jj] = Rho[ai(j,jj,r,m,m,k)]; + } + gsl_linalg_LU_decomp(matrixE, permutation, &signum); + Real detRhoR = gsl_linalg_LU_det(matrixE, signum); + + //compute Y(i,:)*Rho(:,:,r) + for (int j=0; j + Real dotProduct = 0.0; + for (int u=0; u maxLogGamIR) + { + Z[i] = r; + maxLogGamIR = logGamIR; + } + sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0); + } + + sumLogLLF2 += log(sumLLF1); + } + + // Assign output variable LLF + *LLF = -invN * sumLogLLF2; + + //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi)))); + Real newDeltaPhi = 0.0; + for (int j=0; j newDeltaPhi) + newDeltaPhi = tmpDist; + } + } + } + + //update distance parameter to check algorithm convergence (delta(phi, Phi)) + //TODO: deltaPhi should be a linked list for perf. + if (ite < deltaPhiBufferSize) + deltaPhi[ite] = newDeltaPhi; + else + { + sumDeltaPhi -= deltaPhi[0]; + for (int u=0; u + +/******** + * Types + *******/ + +typedef double Real; +//typedef uint32_t UInt; +//typedef int32_t Int; + +/******************************* + * Matrix and arrays indexation + *******************************/ + +// Matrix Index ; TODO? ncol unused +#define mi(i,j,nrow,ncol)\ + j*nrow + i + +// Array Index ; TODO? d3 unused +#define ai(i,j,k,d1,d2,d3)\ + k*d1*d2 + j*d1 + i + +// Array4 Index ; TODO? ... +#define ai4(i,j,k,m,d1,d2,d3,d4)\ + m*d1*d2*d3 + k*d1*d2 + j*d1 + i + +/************************* + * Array copy & "zeroing" + ************************/ + +// Fill an array with zeros +#define zeroArray(array, size)\ +{\ + for (int u=0; u4000); +tableauLassoMLE(ind,:)=[]; +plot(tableauLassoMLE(2:end,3),tableauLassoMLE(2:end,4)+1100*tableauLassoMLE(2:end,2),'.','markersize', 30) +indiceInt = find(tableauLassoMLE(:,4)+1100*tableauLassoMLE(:,2)<-4401); +indiceBis = find(tableauLassoMLE(:,3)==53); +hold on +plot(tableauLassoMLE(indiceInt,3),tableauLassoMLE(indiceInt,4)+1100*tableauLassoMLE(indiceInt,2),'sr','markersize', 30,'linewidth',5) +plot(tableauLassoMLE(indiceBis,3),tableauLassoMLE(indiceBis,4)+1100*tableauLassoMLE(indiceBis,2),'dg','markersize', 30,'linewidth',5) +xlabel('Model dimension','Interpreter','LaTex','FontSize', 45) +ylabel('Penalized log-likelihood for $2 \hat{\kappa}$','Interpreter','LaTex','FontSize', 45) +set(gca, 'FontSize', 30, 'fontName','Times'); +set(gcf,'Units','normal') +set(gca,'Position',[.1 .1 .88 .85]) +hold off +% figure(4) +% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+3000*tableauLassoMLE(:,2),'.','markersize', 20) +% xlabel('Model dimension','FontSize', 30) +% ylabel('Penalized log-likelihood for $\kappa$ = 3000','Interpreter','LaTex','FontSize', 30) +% set(gca, 'FontSize', 20, 'fontName','Times'); +% +% figure(5) +% n2 = find(piLassoMLE(3,:)~=0,1)-1; +% n3 = find(piLassoMLE(4,:)~=0,1)-1; +% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+0*tableauLassoMLE(:,2),'.','markersize', 20) +% xlabel('Model dimension','FontSize', 30) +% ylabel('Log-likelihood','Interpreter','LaTex','FontSize', 30) +% set(gca, 'FontSize', 20, 'fontName','Times'); \ No newline at end of file diff --git a/reports/bazar_Emilie/script.m b/reports/bazar_Emilie/script.m new file mode 100644 index 0000000..33e24c1 --- /dev/null +++ b/reports/bazar_Emilie/script.m @@ -0,0 +1,30 @@ +%clear all +load donneesSelec.mat +%donnees=[ind100,res100]; +donnees = BB; +Res=mean(donnees,2); +for i=1:349 + X2(:,i)=Res(48*(i-1)+1:48*i); +end +for i=1:348 + signal(:,i)=[X2(:,i);X2(:,i+1)]; +end + +[p1,n1]=size(X2); +for i=1:n1 + for j=1:p1 + XC(i,j)=X2(j,i)-mean(X2(:,i)); + end + + [C,L]=wavedec(XC(i,:)',4,'haar'); + xProj(i,:)=C(1:12); +end + +X=xProj(1:end-2,:); +Y=xProj(2:end-1,:); +for i=1:n1 + Xrecon(i,:)=waverec([xProj(i,:),zeros(1,36)],L,'haar'); +end +for i=1:348 + signal3(:,i)=[Xrecon(i,:),Xrecon(i+1,:)]; +end diff --git a/reports/bazar_Emilie/scriptEssai.m b/reports/bazar_Emilie/scriptEssai.m new file mode 100644 index 0000000..3638977 --- /dev/null +++ b/reports/bazar_Emilie/scriptEssai.m @@ -0,0 +1,28 @@ +%clear all +load donneesSelec.mat +donnees=BB; + +Res=mean(donnees,2); +%Ind=mean(donnees(:,1:100),2); +for i=1:340 + X2(:,i)=Res(48*(i-1)+1:48*i); +end +for i=1:339 + signal(:,i)=[X2(:,i);X2(:,i+1)]; +end + +[p1,n1]=size(X2); +for i=1:n1 + [C,L]=wavedec(X2(:,i)',4,'haar'); + xProj(i,:)=C(4:12); +end + +X=xProj(1:end-2,:); +Y=xProj(2:end-1,:); +for i=1:n1 + Xrecon(i,:)=waverec([zeros(1,3),xProj(i,:),zeros(1,36)],L,'haar'); +end + +for i=1:339 + signal2(:,i)=[Xrecon(i,:),Xrecon(i+1,:)]; +end diff --git a/reports/bazar_Emilie/scriptSimules.m b/reports/bazar_Emilie/scriptSimules.m new file mode 100644 index 0000000..f068b5e --- /dev/null +++ b/reports/bazar_Emilie/scriptSimules.m @@ -0,0 +1,9 @@ +for i = 1:100 + courbeX(i,:) = data(((i-1)*96+1):((i-1)*96+48)); + courbeY(i,:) = data(((i-1)*96+49):((i-1)*96+96)); +end +plot((courbeX)') +figure(2) +plot((courbeY)') + +X2 = (courbeX)' \ No newline at end of file diff --git a/reports/bazar_Emilie/simulatedData.R b/reports/bazar_Emilie/simulatedData.R new file mode 100644 index 0000000..6031822 --- /dev/null +++ b/reports/bazar_Emilie/simulatedData.R @@ -0,0 +1,120 @@ +library("mclust") +#library("R.matlab", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.3") +#redrawData = TRUE +#if (redrawData==TRUE){ + ########### + ## Model + ########### + K = 2 + p = 48 + T = seq(0,1.5,length.out = p) + T2 = seq(0,3, length.out = 2*p) + n = 100 + x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1 + plot(T,x1) + lines(T,x1) + + sigmaX = 0.12 + sigmaY = 0.12 + beta = list() + p1= 0.5 + beta[[1]] =diag(c(rep(p1,5),rep(1,5), rep(p1,5), rep(1, p-15))) + p2 = 1 + beta[[2]] = diag(c(rep(p2,5),rep(1,5), rep(p2,5), rep(1, p-15))) + ITE = 100 + ARI1 = ARI2 = ARI3 = rep(0,ITE) + XY = array(0, dim = c(ITE, 2*p,n)) + XYproj = array(0, dim=c(ITE, 96,n)) + + affec = list() + ########### + ## Iterations + ########### + for (ite in c(1:ITE)){ + ########### + ##Sample + ########### + x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n) + affec[[ite]] = sample(c(1,2), n, replace = TRUE) + y = x + xy = matrix(0,ncol=n, nrow= 2*p) + for (i in c(1:n)){ + y[,i] = x[,i] %*% beta[[affec[[ite]][i]]] + rnorm(p, 0, sigmaY) + xy[,i] = c(x[,i],y[,i]) + XY[ite,,i] = xy[,i] - mean(xy[,i]) + # Dx = dwt(x[,i], filter='haar')@W + # Dx = rev(unlist(Dx)) + # Dx = Dx[2:(1+3+6+12+24)] + # Ax = dwt(x[,i], filter='haar')@V + # Ax = rev(unlist(Ax)) + # Ax = Ax[2:(1+3)] + # Dy = dwt(y[,i], filter='haar')@W + # Dy = rev(unlist(Dy)) + # Dy = Dy[2:(1+3+6+12+24)] + # Ay = dwt(y[,i], filter='haar')@V + # Ay = rev(unlist(Ay)) + # Ay = Ay[2:(1+3)] + # XYproj[ite,,i] = c(Ax,Dx,Ay,Dy) + } + print(ite) + # + # + } + xy[c(7,55),] = NA + # write.table(XY,'data.csv', row.names=FALSE, col.names=FALSE) +matplot(T2,xy[,affec[[ite]]==1],type='l', col='red', lty = 1) +matplot(T2,xy[,affec[[ite]]==2],type='l', col='black', add=TRUE, lty= 1) +abline(v = 1.5) +text(0.75,0,'X', cex = 2 ) +text(0.75+1.5,0,'Y', cex = 2 ) +#proj = read.table('dataProj.csv') +#} + + +#matplot(T,x,type='l', col='black', xlab = '', ylab='', lwd=1.5,lty=1) +#matplot(T,y[,affec[[ite]]==1],type='l', col='red', xlab = '', ylab='', lwd=1.5,lty=1) +#matplot(T,y[,affec[[ite]]==2],type='l', col='black', add=TRUE,lwd=2,lty=1) +# proj2 = array(0,dim=c(ITE,2*p,n)) +# for (ite in c(1:ITE)){ +# for (i in c(1:n)){ +# A = proj[ite,(1+(i-1)*96):(i*96)] +# for (j in 1:96){ +# proj2[ite,j,i] = A[1,j] +# } +# } +# print(ite) +# } +########### +## Iterations +########### +Kmod2 = Kmod1 = rep(0,ITE) +Kmod3 = rep(0,ITE) +for (ite in c(1:ITE)){ + print(ite) + ########### + ## k-means 1 + ########### + mod1 = Mclust(t(XY[ite,,]),G = 1:2, mode='VII') + ARI1[ite] = adjustedRandIndex(mod1$classification, affec[[ite]]) + Kmod1[ite] = mod1$G + # ########### + # ## k-means 2 + # ########### + # #proj2 = + # mod2 = Mclust(t(XYproj[ite,,]),G = 1:8, mode='VII') + # ARI2[ite] = adjustedRandIndex(mod2$classification, affec[[ite]]) + # Kmod2[ite] = mod2$G + # ########### + # ## k-means 1 + # ########### + # #proj3 = + # mod3 = Mclust(t(XYproj[ite,c(4:12,52:60),]),G = 1:8, mode='VII') + # ARI3[ite] = adjustedRandIndex(mod3$classification, affec[[ite]]) + # Kmod3[ite] = mod3$G +} +ARI0 = rep(1,ITE) +par(cex.lab=1.5) +par(cex.axis=1.5) +boxplot(ARI0,ARI1, names = c('LassoMLE','K-means'), lwd=1.3) +table(Kmod1) +table(Kmod2) diff --git a/reports/essai16mars.R b/reports/essai16mars.R new file mode 100644 index 0000000..59bbb53 --- /dev/null +++ b/reports/essai16mars.R @@ -0,0 +1,31 @@ +p = 10 +q = 8 +k = 2 +D = 20 + +meanX = matrix(nrow=p,ncol=k) +meanX[,1] = rep(0,p) +meanX[,2] = rep(1,p) + +covX = array(dim=c(p,p,k)) +covX[,,1] = 0.1*diag(p) +covX[,,2] = 0.5*diag(p) + +covY = array(dim = c(q,q,k)) +covY[,,1] = 0.1*diag(q) +covY[,,2] = 0.2*diag(q) + +beta = array(dim = c(p,q,2)) +beta[,,2] = matrix(c(rep(2,(D)),rep(0, p*q-D))) +beta[,,1] = matrix(c(rep(1,D),rep(0, p*q-D))) + +n = 100 + +pi = c(0.4,0.6) + +data = generateXY(meanX,covX,covY, pi, beta, n) + +X = data$X +Y = data$Y + +res_valse = valse(X,Y) diff --git a/reports/essaiPlot.R b/reports/essaiPlot.R new file mode 100644 index 0000000..10b0e01 --- /dev/null +++ b/reports/essaiPlot.R @@ -0,0 +1,73 @@ +### Regression matrices +model = Res +K = dim(model$phi)[3] +valMax = max(abs(model$phi)) + +require(fields) + +if (K<4){ + par(mfrow = c(1,K)) +} else op = par(mfrow = c(2, (K+1)/2)) + +## Phi + +for (r in 1:K){ + image.plot(t(abs(model$phi[,,r])), + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) +} +par(mfrow = c(1,K),oma = c(0,0,3,0)) +mtext("Regression matrices in each cluster", side=3, line=4, font=2, cex=2, col='red') + +par(mfrow = c(1,2), oma=c(0,0,3,0)) +for (i in 1:4) + plot(runif(20), runif(20), + main=paste("random plot (",i,")",sep='')) +par(op) +mtext("Four plots", + side=3, line=4, font=2, cex=2, col='red') + +### Zoom onto two classes we want to compare +kSel = c(1,2) +par(mfrow = c(1,3)) + +for (r in kSel){ + image.plot(t(abs(model$phi[,,r])),xaxt="n",yaxt="n", + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) +} +image.plot(t(abs(model$phi[,,kSel[1]]-model$phi[,,kSel[2]])), + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) + +### Covariance matrices +par(mfrow = c(K, 1)) +for (r in 1:K){ + image.plot(matrix(diag(model$rho[,,r]), ncol= 1), + col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66)) +} + +### proportions +Gam = matrix(0, ncol = K, nrow = n) +gam = Gam +for (i in 1:n){ + for (r in 1:K){ + sqNorm2 = sum( (Y[i,]%*%model$rho[,,r]-X[i,]%*%model$phi[,,r])^2 ) + Gam[i,r] = model$pi[r] * exp(-0.5*sqNorm2)* det(model$rho[,,r]) + } + gam[i,] = Gam[i,] / sum(Gam[i,]) +} +affec = apply(gam, 1,which.max) +gam2 = matrix(NA, ncol = K, nrow = n) +for (i in 1:n){ + gam2[i, affec[i]] = gam[i, affec[i]] +} +boxplot(gam2) + +### Mean in each cluster +XY = cbind(X,Y) +XY_class= list() +meanPerClass= matrix(0, ncol = K, nrow = dim(XY)[2]) +for (r in 1:K){ + XY_class[[r]] = XY[affec == r, ] + meanPerClass[,r] = apply(XY_class[[r]], 2, mean) +} + +matplot(meanPerClass, type='l') diff --git a/reports/simulData_17mars.R b/reports/simulData_17mars.R new file mode 100644 index 0000000..93a8f20 --- /dev/null +++ b/reports/simulData_17mars.R @@ -0,0 +1,56 @@ +simulData_17mars = function(ite){ + set.seed = 22021989+ite + + ########### + ## Modele + ########### + K = 2 + p = 48 + T = seq(0,1.5,length.out = p) + T2 = seq(0,3, length.out = 2*p) + n = 100 + x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1 + sigmaX = 0.12 + sigmaY = 0.12 + beta = list() + p1= 0.5 + beta[[1]] =diag(c(rep(p1,5),rep(1,5), rep(p1,5), rep(1, p-15))) + p2 = 2 + beta[[2]] = diag(c(rep(p2,5),rep(1,5), rep(p2,5), rep(1, p-15))) + ARI1 = ARI2 = ARI3 = 0 + + ########### + ## Data + Projection + ########### + require(wavelets) + XY = array(0, dim = c(2*p,n)) + XYproj = array(0, dim=c(96,n)) + x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n) + affec = sample(c(1,2), n, replace = TRUE) + y = x + xy = matrix(0,ncol=n, nrow= 2*p) + for (i in c(1:n)){ + y[,i] = x[,i] %*% beta[[affec[i]]] + rnorm(p, 0, sigmaY) + xy[,i] = c(x[,i],y[,i]) + XY[,i] = xy[,i] - mean(xy[,i]) + Dx = dwt(x[,i], filter='haar')@W + Dx = rev(unlist(Dx)) + Dx = Dx[2:(1+3+6+12+24)] + Ax = dwt(x[,i], filter='haar')@V + Ax = rev(unlist(Ax)) + Ax = Ax[2:(1+3)] + Dy = dwt(y[,i], filter='haar')@W + Dy = rev(unlist(Dy)) + Dy = Dy[2:(1+3+6+12+24)] + Ay = dwt(y[,i], filter='haar')@V + Ay = rev(unlist(Ay)) + Ay = Ay[2:(1+3)] + XYproj[,i] = c(Ax,Dx,Ay,Dy) + } + + res_valse = valse(t(x),t(y), kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 1000) + res_valse_proj = valse(t(XYproj[1:p,]),t(XYproj[(p+1):(2*p),]), kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 1000) + + save(res_valse,file=paste("Res_",ite, ".RData",sep="")) + save(res_valse_proj,file=paste("ResProj_",ite, ".RData",sep="")) +} diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 0000000..1a02474 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,4 @@ +test.* +!test.*.c +/data/ +vgcore.* diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..8b8697e --- /dev/null +++ b/test/Makefile @@ -0,0 +1,31 @@ +CC = gcc +CFLAGS = -g -std=gnu99 -Wno-implicit-function-declaration +LDFLAGS = -lm -lgsl -lcblas +TEST_LDFLAGS = -L. libvalse_core.so +LIB = libvalse_core.so +LIB_SRC = $(wildcard ../pkg/src/sources/*.c) +LIB_OBJ = $(LIB_SRC:.c=.o) +INCLUDES = -I../pkg/src/sources +TESTS = test.EMGLLF test.EMGrank + +all: $(LIB) $(TESTS) + +$(LIB): $(LIB_OBJ) + $(CC) -shared -o $@ $^ $(LDFLAGS) + +test.EMGLLF: $(LIB) test.EMGLLF.o test_utils.o + $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS) + +test.EMGrank: $(LIB) test.EMGrank.o test_utils.o + $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS) + +%.o: %.c + $(CC) -fPIC -o $@ -c $< $(CFLAGS) $(INCLUDES) + +clean: + rm -f *.o ../pkg/src/sources/*.o + +cclean: clean + rm -f *.so ../pkg/src/*.so $(TESTS) + +.PHONY: all clean cclean diff --git a/test/generateRunSaveTest_EMGLLF.R b/test/generateRunSaveTest_EMGLLF.R new file mode 100644 index 0000000..c46b77b --- /dev/null +++ b/test/generateRunSaveTest_EMGLLF.R @@ -0,0 +1,48 @@ +source("helper.R") +library(valse) + +generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1., lambda=0.5, eps=1e-6) +{ + testFolder = "./data/" + dir.create(testFolder, showWarnings=FALSE, mode="0755") + + params = basicInitParameters(n, p, m, k) + xy = generateXYdefault(n, p, m, k) + + #save inputs + write.table(as.double(params$phiInit), paste(testFolder,"phiInit",sep=""), + row.names=F, col.names=F) + write.table(as.double(params$rhoInit), paste(testFolder,"rhoInit",sep=""), + row.names=F, col.names=F) + write.table(as.double(params$piInit), paste(testFolder,"piInit",sep=""), + row.names=F, col.names=F) + write.table(as.double(params$gamInit), paste(testFolder,"gamInit",sep=""), + row.names=F, col.names=F) + write.table(as.integer(mini), paste(testFolder,"mini",sep=""), + row.names=F, col.names=F) + write.table(as.integer(maxi), paste(testFolder,"maxi",sep=""), + row.names=F, col.names=F) + write.table(as.double(gamma), paste(testFolder,"gamma",sep=""), + row.names=F, col.names=F) + write.table(as.double(lambda), paste(testFolder,"lambda",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$X), paste(testFolder,"X",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$Y), paste(testFolder,"Y",sep=""), + row.names=F, col.names=F) + write.table(as.double(eps), paste(testFolder,"eps",sep=""), + row.names=F, col.names=F) + write.table(as.integer(c(n,p,m,k)), paste(testFolder,"dimensions",sep=""), + row.names=F, col.names=F) + + res = valse::EMGLLF(params$phiInit,params$rhoInit,params$piInit,params$gamInit,mini, + maxi,gamma,lambda,xy$X,xy$Y,eps,fast=FALSE) + + #save outputs + write.table(as.double(res$phi),paste(testFolder,"phi",sep=""),row.names=F,col.names=F) + write.table(as.double(res$rho),paste(testFolder,"rho",sep=""),row.names=F,col.names=F) + write.table(as.double(res$pi),paste(testFolder,"pi",sep=""),row.names=F,col.names=F) + write.table(as.double(res$llh),paste(testFolder,"llh",sep=""),row.names=F,col.names=F) + write.table(as.double(res$S),paste(testFolder,"S",sep=""),row.names=F,col.names=F) + write.table(as.integer(res$affec),paste(testFolder,"affec",sep=""),row.names=F,col.names=F) +} diff --git a/test/generateRunSaveTest_EMGrank.R b/test/generateRunSaveTest_EMGrank.R new file mode 100644 index 0000000..f6a3314 --- /dev/null +++ b/test/generateRunSaveTest_EMGrank.R @@ -0,0 +1,40 @@ +source("helper.R") +library(valse) + +generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1.0, rank = c(1,2,4)) +{ + eps = 1e-6 + Pi = rep(1.0/k, k) + Rho = array(dim=c(m,m,k)) + for(i in 1:k) + Rho[,,i] = diag(1,m) + xy = generateXYdefault(n, p, m, k) + + testFolder = "./data/" + dir.create(testFolder, showWarnings=FALSE, mode="0755") + #save inputs + write.table(as.double(Pi), paste(testFolder,"Pi",sep=""), + row.names=F, col.names=F) + write.table(as.double(Rho), paste(testFolder,"Rho",sep=""), + row.names=F, col.names=F) + write.table(as.integer(mini), paste(testFolder,"mini",sep=""), + row.names=F, col.names=F) + write.table(as.integer(maxi), paste(testFolder,"maxi",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$X), paste(testFolder,"X",sep=""), + row.names=F, col.names=F) + write.table(as.double(xy$Y), paste(testFolder,"Y",sep=""), + row.names=F, col.names=F) + write.table(as.double(eps), paste(testFolder,"eps",sep=""), + row.names=F, col.names=F) + write.table(as.integer(rank), paste(testFolder,"rank",sep=""), + row.names=F, col.names=F) + write.table(as.integer(c(n,p,m,k)), paste(testFolder,"dimensions",sep=""), + row.names=F, col.names=F) + + res = valse::EMGrank(Pi,Rho,mini,maxi,xy$X,xy$Y,eps,rank,fast=FALSE) + + #save output + write.table(as.double(res$phi),paste(testFolder,"phi",sep=""),row.names=F,col.names=F) + write.table(as.double(res$LLF),paste(testFolder,"LLF",sep=""),row.names=F,col.names=F) +} diff --git a/test/helper.R b/test/helper.R new file mode 100644 index 0000000..8ec122b --- /dev/null +++ b/test/helper.R @@ -0,0 +1,58 @@ +#' Generate a sample of (X,Y) of size n with default values +#' +#' @param n sample size +#' @param p number of covariates +#' @param m size of the response +#' @param k number of clusters +#' +#' @return list with X and Y +#' +generateXYdefault = function(n, p, m, k) +{ + meanX = rep(0, p) + covX = diag(p) + covY = array(dim=c(m,m,k)) + for(r in 1:k) + covY[,,r] = diag(m) + π = rep(1./k,k) + #initialize beta to a random number of non-zero random value + β = array(0, dim=c(p,m,k)) + for (j in 1:p) + { + nonZeroCount = sample(1:m, 1) + β[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k) + } + + sample_IO = generateXY(n, π, meanX, β, covX, covY) + return (list(X=sample_IO$X,Y=sample_IO$Y)) +} + +#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for +#' weights, identity for covariance matrices, and uniformly distributed for the +#' clustering) +#' +#' @param n sample size +#' @param p number of covariates +#' @param m size of the response +#' @param k number of clusters +#' +#' @return list with phiInit, rhoInit,piInit,gamInit +#' +basicInitParameters = function(n,p,m,k) +{ + phiInit = array(0, dim=c(p,m,k)) + + piInit = (1./k)*rep(1,k) + + rhoInit = array(dim=c(m,m,k)) + for (i in 1:k) + rhoInit[,,i] = diag(m) + + gamInit = 0.1 * matrix(1, nrow=n, ncol=k) + R = sample(1:k, n, replace=TRUE) + for (i in 1:n) + gamInit[i,R[i]] = 0.9 + gamInit = gamInit/sum(gamInit[1,]) + + return (list("phiInit"=phiInit, "rhoInit"=rhoInit, "piInit"=piInit, "gamInit"=gamInit)) +} diff --git a/test/run.sh b/test/run.sh new file mode 100755 index 0000000..7e92929 --- /dev/null +++ b/test/run.sh @@ -0,0 +1,37 @@ +#!/bin/sh +set -e + +#Testing procedure for EMGLLF (inside this folder): + +algo=$1 #EMGLLF or EMGrank, + #second arg indicate if rebuild or rebuild+clean requested + +if [ "$2" == 'c' ]; then + #0.1) Clean package + C testing code + find ../pkg/man/ -type f ! -name 'valse-package.Rd' -delete + rm -f ../pkg/NAMESPACE + # Erase object and library files + rm -f ../pkg/src/*.so + rm -f ../pkg/src/adapters/*.o + make cclean +fi + +if [ "$2" == 'r' ] || [ "$2" == 'c' ]; then + #0.2) Install current version of the package (WARNING: roxygen 2 v5.0.1) + # --> devtools::install_github('klutometis/roxygen@v5.0.1') + echo "setwd('../pkg');library(roxygen2);roxygenize('.')" | R --slave + R CMD INSTALL ../pkg +fi + +#1) Generate data using R versions of EMGLLF/EMGrank (slow, but trusted) +echo -e "source('generateRunSaveTest_$algo.R');\n \ + # I'm happy with default values - feel free to give args\n \ + generateRunSaveTest_$algo() " \ + | R --slave + +#2) Compile test C code into an executable named "test.$algo" +make test.$algo + +#3) Run it with valgrind! +#valgrind +./test.$algo diff --git a/test/script_data.R b/test/script_data.R new file mode 100644 index 0000000..da319da --- /dev/null +++ b/test/script_data.R @@ -0,0 +1,15 @@ +m=6 +p=6 + +covY = array(0,dim = c(m,m,2)) +covY[,,1] = diag(m) +covY[,,2] = diag(m) + +Beta = array(0, dim = c(p, m, 2)) +Beta[1:4,1:4,1] = 3*diag(4) +Beta[1:4,1:4,2] = -2*diag(4) + +Data = generateXY(200, c(0.5,0.5), rep(0,p), Beta, diag(p), covY) +# +Res = valse(Data$X,Data$Y, fast=TRUE, plot=FALSE, verbose = TRUE, kmax=3, size_coll_mod = 50, selecMod = "DDSE", mini = 50, maxi=100) +plot(Res$tableau[,3], -Res$tableau[,4]) diff --git a/test/test.EMGLLF.c b/test/test.EMGLLF.c new file mode 100644 index 0000000..68f73d9 --- /dev/null +++ b/test/test.EMGLLF.c @@ -0,0 +1,83 @@ +#include "EMGLLF.h" +#include "test_utils.h" +#include + +int main(int argc, char** argv) +{ + int* dimensions = readArray_int("dimensions"); + int n = dimensions[0]; + int p = dimensions[1]; + int m = dimensions[2]; + int k = dimensions[3]; + free(dimensions); + + //////////// + // INPUTS // + Real* phiInit = readArray_real("phiInit"); + Real* rhoInit = readArray_real("rhoInit"); + Real* piInit = readArray_real("piInit"); + Real* gamInit = readArray_real("gamInit"); + int mini = read_int("mini"); + int maxi = read_int("maxi"); + Real gamma = read_real("gamma"); + Real lambda = read_real("lambda"); + Real* X = readArray_real("X"); + Real* Y = readArray_real("Y"); + Real eps = read_real("eps"); + //////////// + + ///////////// + // OUTPUTS // + Real* phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* rho = (Real*)malloc(m*m*k*sizeof(Real)); + Real* pi = (Real*)malloc(k*sizeof(Real)); + Real llh; + Real* S = (Real*)malloc(p*m*k*sizeof(Real)); + int* affec = (int*)malloc(n*sizeof(int)); + ///////////// + + //////////////////// + // Call to EMGLLF // + EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps, + phi,rho,pi,&llh,S,affec, + n,p,m,k); + //////////////////// + + free(phiInit); + free(rhoInit); + free(piInit); + free(gamInit); + free(X); + free(Y); + + // Compare to reference outputs + Real* ref_phi = readArray_real("phi"); + compareArray_real("phi", phi, ref_phi, p*m*k); + free(phi); + free(ref_phi); + + Real* ref_rho = readArray_real("rho"); + compareArray_real("rho", rho, ref_rho, m*m*k); + free(rho); + free(ref_rho); + + Real* ref_pi = readArray_real("pi"); + compareArray_real("pi", pi, ref_pi, k); + free(pi); + free(ref_pi); + + Real ref_llh = read_real("llh"); + compareArray_real("llh", &llh, &ref_llh, 1); + + Real* ref_S = readArray_real("S"); + compareArray_real("S", S, ref_S, p*m*k); + free(S); + free(ref_S); + + int* ref_affec = readArray_int("affec"); + compareArray_int("affec", affec, ref_affec, n); + free(affec); + free(ref_affec); + + return 0; +} diff --git a/test/test.EMGrank.c b/test/test.EMGrank.c new file mode 100644 index 0000000..71ce506 --- /dev/null +++ b/test/test.EMGrank.c @@ -0,0 +1,58 @@ +#include "EMGrank.h" +#include "test_utils.h" +#include + +int main(int argc, char** argv) +{ + int* dimensions = readArray_int("dimensions"); + int n = dimensions[0]; + int p = dimensions[1]; + int m = dimensions[2]; + int k = dimensions[3]; + free(dimensions); + + //////////// + // INPUTS // + Real* Rho = readArray_real("Rho"); + Real* Pi = readArray_real("Pi"); + int mini = read_int("mini"); + int maxi = read_int("maxi"); + Real* X = readArray_real("X"); + Real* Y = readArray_real("Y"); + Real eps = read_real("eps"); + int* rank = readArray_int("rank"); + //////////// + + ///////////// + // OUTPUTS // + Real* phi = (Real*)malloc(p*m*k*sizeof(Real)); + Real* LLF = (Real*)malloc(1*sizeof(Real)); + ///////////// + + ////////////////////////// + // Main call to EMGrank // + EMGrank_core(Pi,Rho,mini,maxi,X,Y,eps,rank, + phi,LLF, + n,p,m,k); + ////////////////////////// + + free(Rho); + free(Pi); + free(X); + free(Y); + free(rank); + + // Compare to reference outputs + Real* ref_phi = readArray_real("phi"); + compareArray_real("phi", phi, ref_phi, p*m*k); + free(phi); + free(ref_phi); + + // LLF + Real* ref_LLF = readArray_real("LLF"); + compareArray_real("LLF", LLF, ref_LLF, 1); + free(LLF); + free(ref_LLF); + + return 0; +} diff --git a/test/test_EMGLLF.R b/test/test_EMGLLF.R new file mode 100644 index 0000000..8ca7dea --- /dev/null +++ b/test/test_EMGLLF.R @@ -0,0 +1,39 @@ +library(valse) +testFolder = "data/" + +# NOTE: R typing is really terrible. as.double as.matrix ...and so on; don't remove. + +#get inputs +npmk = as.matrix(read.table(paste(testFolder,"dimensions",sep=""))) +n = npmk[1]; p=npmk[2]; m=npmk[3]; k=npmk[4] +phiInit = array(as.double(as.matrix(read.table(paste(testFolder,"phiInit",sep="")))), dim=c(p,m,k)) +rhoInit = array(as.double(as.matrix(read.table(paste(testFolder,"rhoInit",sep="")))), dim=c(m,m,k)) +piInit = as.double(as.matrix(read.table(paste(testFolder,"piInit",sep="")))[,]) +gamInit = matrix(as.double(as.matrix(read.table(paste(testFolder,"gamInit",sep="")))), n,k) +mini = as.integer(as.matrix(read.table(paste(testFolder,"mini",sep="")))[1]) +maxi = as.integer(as.matrix(read.table(paste(testFolder,"maxi",sep="")))[1]) +gamma = as.double(as.matrix(read.table(paste(testFolder,"gamma",sep="")))[1]) +lambda = as.double(as.matrix(read.table(paste(testFolder,"lambda",sep="")))[1]) +X = matrix(as.double(as.matrix(read.table(paste(testFolder,"X",sep="")))), n,p) +Y = matrix(as.double(as.matrix(read.table(paste(testFolder,"Y",sep="")))), n,m) +eps = as.double(as.matrix(read.table(paste(testFolder,"eps",sep="")))[1]) + +#get outputs +phi = array(as.double(as.matrix(read.table(paste(testFolder,"phi",sep="")))), dim=c(p,m,k)) +rho = array(as.double(as.matrix(read.table(paste(testFolder,"rho",sep="")))), dim=c(m,m,k)) +pi = as.double(as.matrix(read.table(paste(testFolder,"pi",sep="")))[,]) +llh = as.double(as.matrix(read.table(paste(testFolder,"llh",sep="")))[1]) +S = array(as.double(as.matrix(read.table(paste(testFolder,"S",sep="")))), dim=c(p,m,k)) +affec = as.double(as.matrix(read.table(paste(testFolder,"affec",sep="")))[,]) + +res = valse::EMGLLF( + phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,fast=TRUE) + +#compare outputs +nd=7 #number of digits +print( all(round(phi,nd) == round(res$phi,nd)) ) +print( all(round(rho,nd) == round(res$rho,nd)) ) +print( all(round(pi,nd) == round(res$pi,nd)) ) +print( all(round(llh,nd) == round(res$llh,nd)) ) +print( all(round(S,nd) == round(res$S,nd)) ) +print( all(affec == res$affec) ) diff --git a/test/test_EMGrank.R b/test/test_EMGrank.R new file mode 100644 index 0000000..b140e5e --- /dev/null +++ b/test/test_EMGrank.R @@ -0,0 +1,27 @@ +library(valse) +testFolder = "data/" + +# NOTE: R typing is really terrible. as.double as.matrix ...and so on; don't remove. + +#get inputs +npmk = as.matrix(read.table(paste(testFolder,"dimensions",sep=""))) +n = npmk[1]; p=npmk[2]; m=npmk[3]; k=npmk[4] +Pi = as.double(as.matrix(read.table(paste(testFolder,"Pi",sep="")))[,]) +Rho = array(as.double(as.matrix(read.table(paste(testFolder,"Rho",sep="")))), dim=c(m,m,k)) +mini = as.integer(as.matrix(read.table(paste(testFolder,"mini",sep="")))[1]) +maxi = as.integer(as.matrix(read.table(paste(testFolder,"maxi",sep="")))[1]) +X = matrix(as.double(as.matrix(read.table(paste(testFolder,"X",sep="")))), n,p) +Y = matrix(as.double(as.matrix(read.table(paste(testFolder,"Y",sep="")))), n,m) +eps = as.double(as.matrix(read.table(paste(testFolder,"eps",sep="")))[1]) +rank = as.double(as.matrix(read.table(paste(testFolder,"rank",sep="")))[,]) + +#get outputs +phi = array(as.double(as.matrix(read.table(paste(testFolder,"phi",sep="")))), dim=c(p,m,k)) +LLF = as.double(as.matrix(read.table(paste(testFolder,"LLF",sep="")))[1]) + +res = valse::EMGrank(Pi,Rho,mini,maxi,X,Y,eps,rank,fast=TRUE) + +#compare outputs +nd=7 #number of digits +print( all(round(phi,nd) == round(res$phi,nd)) ) +print( all(round(LLF,nd) == round(res$LLF,nd)) ) diff --git a/test/test_utils.c b/test/test_utils.c new file mode 100644 index 0000000..96972ee --- /dev/null +++ b/test/test_utils.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include "utils.h" + +// Check if array == refArray +void compareArray(const char* ID, const void* array, const void* refArray, int size, + int isinteger) +{ + Real EPS = 1e-5; //precision + printf("Checking %s\n",ID); + Real maxError = 0.0; + for (int i=0; i= maxError) + maxError = error; + } + if (maxError >= EPS) + printf(" Inaccuracy: max(abs(error)) = %g >= %g\n",maxError,EPS); + else + printf(" OK\n"); +} + +void compareArray_real(const char* ID, const void* array, const void* refArray, int size) +{ + return compareArray(ID, array, refArray, size, 0); +} + +void compareArray_int(const char* ID, const void* array, const void* refArray, int size) +{ + return compareArray(ID, array, refArray, size, 1); +} + +// Read array by columns (as in MATLAB) and return by-rows encoding +void* readArray(const char* fileName, int isinteger) +{ + // need to prepend 'data/' (not really nice code...) + char* fullFileName = (char*)calloc(5+strlen(fileName)+1, sizeof(char)); + strcat(fullFileName, "data/"); + strcat(fullFileName, fileName); + + // first pass to know how many elements to allocate + char* command = (char*)calloc(12+strlen(fullFileName)+8+1, sizeof(char)); + strcat(command, "wc -l "); + strcat(command, fullFileName); + FILE *arraySize = popen(command, "r"); + free(command); + char* bufferNum = (char*)calloc(64, sizeof(char)); + fgets(bufferNum, sizeof(bufferNum), arraySize); + int n = atoi(bufferNum); + pclose(arraySize); + + // open file for reading + FILE* arrayFile = fopen(fullFileName, "r"); + free(fullFileName); + + // read all values, and convert them to by-rows matrices format + size_t elementSize = isinteger ? sizeof(int) : sizeof(Real); + void* array = malloc(n*elementSize); + for (int i=0; i