--- /dev/null
+*.pdf filter=fat
+*.zip filter=fat
+*.tar.xz filter=fat
+*.data filter=fat
+*.png filter=fat
--- /dev/null
+[rsync]
+remote = gitfat@auder.net:~/files/valse
--- /dev/null
+#ignore temporary files
+*~
+*.swp
+*.Rout
+
+#ignore R session files + RStudio files
+.Rhistory
+.RData
+*.Rproj*
+.Rprofile
+.Rbuildignore
+
+#ignore R CMD build/check genrated files
+/*.Rcheck/
+/*.tar.gz
+
+#ignore object files and executables
+*.o
+*.so
+*.exe
+
+#misc
+Rprof.out
+*.zip
--- /dev/null
+# VAriable seLection with mixtureS of modEls
+
+This code is a re-writing from a similar project in MATLAB, still online [here](https://git.auder.net/?p=select.git;a=summary).
+
+Code co-authors: [Emilie Devijver](http://ama.liglab.fr/~devijver/), [Benjamin Gohehry](http://www.math.u-psud.fr/~goehry/).
+
+This code corresponds to applied parts of the PhD thesis of both co-authors.
+
+It uses git-fat to store binary objects. "git fat init && git fat pull" will get them.
+
+## Description
+
+TODO : see R package
+
+Trouver un jeu de données (+) intéressant (que les autres) ?
+Ajouter toy datasets pour les tests (ou piocher dans MASS ?)
+
+ED : j'ai simulé un truc basique via 'generateXYdefault(10,5,6,2)'
--- /dev/null
+n = 100; m = 70; p = 5
+X = matrix(runif(n*p, -10, 10), nrow=n)
+Y = matrix(runif(n*m, -5, 15), nrow=n)
+
+V1 = valse::valse(X, Y, fast=FALSE)
+Error in while (!pi2AllPositive) { :
+ missing value where TRUE/FALSE needed
+
+V2 = valse::valse(X, Y, fast=TRUE)
+list()
+Error in out[[ind_uniq[l]]] :
+ attempt to select less than one element in get1index
+
+==> Error identified: line 61 in initSmallEM.R, division by 0
+It occurs also for smallers values of n and m, e.g.: n = 20; m = 20; p = 3
--- /dev/null
+#$# git-fat 31158da74f93ba8b9e3883e8d4173e2da02255cf 1930037
--- /dev/null
+#$# git-fat c89e92fb27bcb2c94bc0eddb2af5e724ff676b9a 2049044
--- /dev/null
+#!/bin/sh
+#
+# Hook used to indent all source files before commiting
+#
+
+# indent / format file by type
+indent() {
+ # getting against as the current commit
+ if git rev-parse --verify HEAD >/dev/null 2>&1
+ then
+ local against=HEAD
+ else
+ # Initial commit: diff against an empty tree object
+ local against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
+ fi
+
+ # loop on modified files
+ git diff --cached --name-only $against | while read file;
+ do
+ local ext=$(expr "$file" : ".*\(\..*\)")
+ case $ext in
+ .R|.r)
+ __indent_R;
+ ;;
+ esac
+ done
+}
+
+# Indent the file with `indent' if this is a R file
+__indent_R() {
+ if test ! -f $file
+ then
+ return;
+ fi
+
+ echo "Indenting " $file
+ echo "library(formatR);formatR::tidy_source('$file',comment=TRUE,blank=TRUE,
+ arrow=TRUE,brace.newline=TRUE,indent=2,width.cutoff=80,file='$file')" | R --slave
+ git add "$file"
+}
+
+indent
--- /dev/null
+#!/bin/sh
+
+git-fat push
--- /dev/null
+#ignore roxygen2 generated files
+/NAMESPACE
+/man/*.Rd
+!/man/*-package.Rd
--- /dev/null
+Package: valse
+Title: Variable Selection With Mixture Of Models
+Date: 2016-12-01
+Version: 0.1-0
+Description: Two methods are implemented to cluster data with finite mixture
+ regression models. Those procedures deal with high-dimensional covariates and
+ responses through a variable selection procedure based on the Lasso estimator.
+ A low-rank constraint could be added, computed for the Lasso-Rank procedure.
+ A collection of models is constructed, varying the level of sparsity and the
+ number of clusters, and a model is selected using a model selection criterion
+ (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model-
+ based clustering for high-dimensional data. Application to functional data' by
+ Emilie Devijver, published in Advances in Data Analysis and Clustering (2016).
+Author: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre],
+ Emilie Devijver <Emilie.Devijver@kuleuven.be> [aut],
+ Benjamin Goehry <Benjamin.Goehry@math.u-psud.fr> [aut]
+Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr>
+Depends:
+ R (>= 3.0.0)
+Imports:
+ MASS,
+ parallel
+Suggests:
+ capushe,
+ methods,
+ roxygen2,
+ testthat
+URL: http://git.auder.net/?p=valse.git
+License: MIT + file LICENSE
+RoxygenNote: 6.0.1
+Collate:
+ 'plot_valse.R'
+ 'main.R'
+ 'selectVariables.R'
+ 'constructionModelesLassoRank.R'
+ 'constructionModelesLassoMLE.R'
+ 'computeGridLambda.R'
+ 'initSmallEM.R'
+ 'EMGrank.R'
+ 'EMGLLF.R'
+ 'generateXY.R'
+ 'A_NAMESPACE.R'
+ 'util.R'
--- /dev/null
+Copyright (c)
+ 2014-2017, Benjamin Auder
+ 2014-2017, Emilie Devijver
+ 2016-2017, Benjamin Goehry
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null
+#' @include generateXY.R
+#' @include EMGLLF.R
+#' @include EMGrank.R
+#' @include initSmallEM.R
+#' @include computeGridLambda.R
+#' @include constructionModelesLassoMLE.R
+#' @include constructionModelesLassoRank.R
+#' @include selectVariables.R
+#' @include main.R
+#' @include plot_valse.R
+#'
+#' @useDynLib valse
+#'
+#' @importFrom parallel makeCluster parLapply stopCluster clusterExport
+#' @importFrom MASS ginv
+NULL
--- /dev/null
+#' EMGLLF
+#'
+#' Description de EMGLLF
+#'
+#' @param phiInit an initialization for phi
+#' @param rhoInit an initialization for rho
+#' @param piInit an initialization for pi
+#' @param gamInit initialization for the a posteriori probabilities
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param lambda regularization parameter in the Lasso estimation
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#'
+#' @return A list ... phi,rho,pi,LLF,S,affec:
+#' phi : parametre de moyenne renormalisé, calculé par l'EM
+#' rho : parametre de variance renormalisé, calculé par l'EM
+#' pi : parametre des proportions renormalisé, calculé par l'EM
+#' LLF : log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres
+#' S : ...
+#' affec : ...
+#'
+#' @export
+EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps, fast)
+{
+ if (!fast)
+ {
+ # Function in R
+ return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps))
+ }
+
+ # Function in C
+ n <- nrow(X) #nombre d'echantillons
+ p <- ncol(X) #nombre de covariables
+ m <- ncol(Y) #taille de Y (multivarié)
+ k <- length(piInit) #nombre de composantes dans le mélange
+ .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k),
+ llh = double(1), S = double(p * m * k), affec = integer(n), n, p, m, k,
+ PACKAGE = "valse")
+}
+
+# R version - slow but easy to read
+.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps)
+{
+ # Matrix dimensions
+ n <- nrow(X)
+ p <- ncol(X)
+ m <- ncol(Y)
+ k <- length(piInit)
+
+ # Adjustments required when p==1 or m==1 (var.sel. or output dim 1)
+ if (p==1 || m==1)
+ phiInit <- array(phiInit, dim=c(p,m,k))
+ if (m==1)
+ rhoInit <- array(rhoInit, dim=c(m,m,k))
+
+ # Outputs
+ phi <- phiInit
+ rho <- rhoInit
+ pi <- piInit
+ llh <- -Inf
+ S <- array(0, dim = c(p, m, k))
+
+ # Algorithm variables
+ gam <- gamInit
+ Gram2 <- array(0, dim = c(p, p, k))
+ ps2 <- array(0, dim = c(p, m, k))
+ X2 <- array(0, dim = c(n, p, k))
+ Y2 <- array(0, dim = c(n, m, k))
+
+ for (ite in 1:maxi)
+ {
+ # Remember last pi,rho,phi values for exit condition in the end of loop
+ Phi <- phi
+ Rho <- rho
+ Pi <- pi
+
+ # Computations associated to X and Y
+ for (r in 1:k)
+ {
+ for (mm in 1:m)
+ Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
+ for (i in 1:n)
+ X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
+ for (mm in 1:m)
+ ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
+ for (j in 1:p)
+ {
+ for (s in 1:p)
+ Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
+ }
+ }
+
+ ## M step
+
+ # For pi
+ b <- sapply(1:k, function(r) sum(abs(phi[, , r])))
+ gam2 <- colSums(gam)
+ a <- sum(gam %*% log(pi))
+
+ # While the proportions are nonpositive
+ kk <- 0
+ pi2AllPositive <- FALSE
+ while (!pi2AllPositive)
+ {
+ pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi)
+ pi2AllPositive <- all(pi2 >= 0)
+ kk <- kk + 1
+ }
+
+ # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
+ while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+ # na.rm=TRUE to handle 0*log(0)
+ -sum(gam2 * log(pi2), na.rm=TRUE)/n + lambda * sum(pi2^gamma * b))
+ {
+ pi2 <- pi + 0.1^kk * (1/n * gam2 - pi)
+ kk <- kk + 1
+ }
+ t <- 0.1^kk
+ pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi))
+
+ # For phi and rho
+ for (r in 1:k)
+ {
+ for (mm in 1:m)
+ {
+ ps <- 0
+ for (i in 1:n)
+ ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r])
+ nY2 <- sum(Y2[, mm, r]^2)
+ rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2)
+ }
+ }
+
+ for (r in 1:k)
+ {
+ for (j in 1:p)
+ {
+ for (mm in 1:m)
+ {
+ S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] +
+ sum(phi[-j, mm, r] * Gram2[j, -j, r])
+ if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) {
+ phi[j, mm, r] <- 0
+ } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) {
+ phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r]
+ } else {
+ phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r]
+ }
+ }
+ }
+ }
+
+ ## E step
+
+ # Precompute det(rho[,,r]) for r in 1...k
+ detRho <- sapply(1:k, function(r) gdet(rho[, , r]))
+ sumLogLLH <- 0
+ for (i in 1:n)
+ {
+ # Update gam[,]; use log to avoid numerical problems
+ logGam <- sapply(1:k, function(r) {
+ log(pi[r]) + log(detRho[r]) - 0.5 *
+ sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2)
+ })
+
+ logGam <- logGam - max(logGam) #adjust without changing proportions
+ gam[i, ] <- exp(logGam)
+ norm_fact <- sum(gam[i, ])
+ gam[i, ] <- gam[i, ] / norm_fact
+ sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2))
+ }
+
+ sumPen <- sum(pi^gamma * b)
+ last_llh <- llh
+ llh <- -sumLogLLH/n #+ lambda * sumPen
+ dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh)))
+ Dist1 <- max((abs(phi - Phi))/(1 + abs(phi)))
+ Dist2 <- max((abs(rho - Rho))/(1 + abs(rho)))
+ Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi)))
+ dist2 <- max(Dist1, Dist2, Dist3)
+
+ if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
+ break
+ }
+
+ affec = apply(gam, 1, which.max)
+ list(phi = phi, rho = rho, pi = pi, llh = llh, S = S, affec=affec)
+}
--- /dev/null
+#' EMGrank
+#'
+#' Description de EMGrank
+#'
+#' @param Pi Parametre de proportion
+#' @param Rho Parametre initial de variance renormalisé
+#' @param mini Nombre minimal d'itérations dans l'algorithme EM
+#' @param maxi Nombre maximal d'itérations dans l'algorithme EM
+#' @param X Régresseurs
+#' @param Y Réponse
+#' @param eps Seuil pour accepter la convergence
+#' @param rank Vecteur des rangs possibles
+#'
+#' @return A list ...
+#' phi : parametre de moyenne renormalisé, calculé par l'EM
+#' LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
+#'
+#' @export
+EMGrank <- function(Pi, Rho, mini, maxi, X, Y, eps, rank, fast = TRUE)
+{
+ if (!fast)
+ {
+ # Function in R
+ return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, eps, rank))
+ }
+
+ # Function in C
+ n <- nrow(X) #nombre d'echantillons
+ p <- ncol(X) #nombre de covariables
+ m <- ncol(Y) #taille de Y (multivarié)
+ k <- length(Pi) #nombre de composantes dans le mélange
+ .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, eps, as.integer(rank), phi = double(p * m * k),
+ LLF = double(1), n, p, m, k, PACKAGE = "valse")
+}
+
+# helper to always have matrices as arg (TODO: put this elsewhere? improve?) -->
+# Yes, we should use by-columns storage everywhere... [later!]
+matricize <- function(X)
+{
+ if (!is.matrix(X))
+ return(t(as.matrix(X)))
+ return(X)
+}
+
+# R version - slow but easy to read
+.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, eps, rank)
+{
+ # matrix dimensions
+ n <- nrow(X)
+ p <- ncol(X)
+ m <- ncol(Y)
+ k <- length(Pi)
+
+ # init outputs
+ phi <- array(0, dim = c(p, m, k))
+ Z <- rep(1, n)
+ LLF <- 0
+
+ # local variables
+ Phi <- array(0, dim = c(p, m, k))
+ deltaPhi <- c()
+ sumDeltaPhi <- 0
+ deltaPhiBufferSize <- 20
+
+ # main loop
+ ite <- 1
+ while (ite <= mini || (ite <= maxi && sumDeltaPhi > eps))
+ {
+ # M step: update for Beta ( and then phi)
+ for (r in 1:k)
+ {
+ Z_indice <- seq_len(n)[Z == r] #indices where Z == r
+ if (length(Z_indice) == 0)
+ next
+ # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+ s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*%
+ crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ])))
+ S <- s$d
+ # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation
+ # of the initial product
+ if (rank[r] < length(S))
+ S[(rank[r] + 1):length(S)] <- 0
+ phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r]
+ }
+
+ # Step E and computation of the loglikelihood
+ sumLogLLF2 <- 0
+ for (i in seq_len(n))
+ {
+ sumLLF1 <- 0
+ maxLogGamIR <- -Inf
+ for (r in seq_len(k))
+ {
+ dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r])
+ logGamIR <- log(Pi[r]) + log(gdet(Rho[, , r])) - 0.5 * dotProduct
+ # Z[i] = index of max (gam[i,])
+ if (logGamIR > maxLogGamIR)
+ {
+ Z[i] <- r
+ maxLogGamIR <- logGamIR
+ }
+ sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2)
+ }
+ sumLogLLF2 <- sumLogLLF2 + log(sumLLF1)
+ }
+
+ LLF <- -1/n * sumLogLLF2
+
+ # update distance parameter to check algorithm convergence (delta(phi, Phi))
+ deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
+ if (length(deltaPhi) > deltaPhiBufferSize)
+ deltaPhi <- deltaPhi[2:length(deltaPhi)]
+ sumDeltaPhi <- sum(abs(deltaPhi))
+
+ # update other local variables
+ Phi <- phi
+ ite <- ite + 1
+ }
+ return(list(phi = phi, LLF = LLF))
+}
--- /dev/null
+#' computeGridLambda
+#'
+#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator
+#'
+#' @param phiInit value for phi
+#' @param rhoInit for rho
+#' @param piInit for pi
+#' @param gamInit value for gamma
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param gamma power of weights in the penalty
+#' @param mini minimum number of iterations in EM algorithm
+#' @param maxi maximum number of iterations in EM algorithm
+#' @param eps threshold to stop EM algorithm
+#'
+#' @return the grid of regularization parameters
+#'
+#' @export
+computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini,
+ maxi, eps, fast)
+{
+ n <- nrow(X)
+ p <- ncol(X)
+ m <- ncol(Y)
+ k <- length(piInit)
+
+ list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0,
+ X, Y, eps, fast)
+
+ grid <- array(0, dim = c(p, m, k))
+ for (j in 1:p)
+ {
+ for (mm in 1:m)
+ grid[j, mm, ] <- abs(list_EMG$S[j, mm, ])/(n * list_EMG$pi^gamma)
+ }
+ sort(unique(grid))
+}
--- /dev/null
+#' constructionModelesLassoMLE
+#'
+#' Construct a collection of models with the Lasso-MLE procedure.
+#'
+#' @param phiInit an initialization for phi, get by initSmallEM.R
+#' @param rhoInit an initialization for rho, get by initSmallEM.R
+#' @param piInit an initialization for pi, get by initSmallEM.R
+#' @param gamInit an initialization for gam, get by initSmallEM.R
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param S output of selectVariables.R
+#' @param ncores Number of cores, by default = 3
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#'
+#' @return a list with several models, defined by phi, rho, pi, llh
+#'
+#' @export
+constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
+ maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose)
+{
+ if (ncores > 1)
+ {
+ cl <- parallel::makeCluster(ncores, outfile = "")
+ parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit",
+ "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S",
+ "ncores", "fast", "verbose"))
+ }
+
+ # Individual model computation
+ computeAtLambda <- function(lambda)
+ {
+ if (ncores > 1)
+ require("valse") #nodes start with an empty environment
+
+ if (verbose)
+ print(paste("Computations for lambda=", lambda))
+
+ n <- nrow(X)
+ p <- ncol(X)
+ m <- ncol(Y)
+ k <- length(piInit)
+ sel.lambda <- S[[lambda]]$selected
+ # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
+ col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars
+ if (length(col.sel) == 0)
+ return(NULL)
+
+ # lambda == 0 because we compute the EMV: no penalization here
+ res <- EMGLLF(array(phiInit,dim=c(p,m,k))[col.sel, , ], rhoInit, piInit, gamInit,
+ mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast)
+
+ # Eval dimension from the result + selected
+ phiLambda2 <- res$phi
+ rhoLambda <- res$rho
+ piLambda <- res$pi
+ phiLambda <- array(0, dim = c(p, m, k))
+ for (j in seq_along(col.sel))
+ phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ]
+ dimension <- length(unlist(sel.lambda))
+
+ ## Affectations
+ Gam <- matrix(0, ncol = length(piLambda), nrow = n)
+ for (i in 1:n)
+ {
+ for (r in 1:length(piLambda))
+ {
+ sqNorm2 <- sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2)
+ Gam[i, r] <- piLambda[r] * exp(-0.5 * sqNorm2) * det(rhoLambda[, , r])
+ }
+ }
+ Gam2 <- Gam/rowSums(Gam)
+ affec <- apply(Gam2, 1, which.max)
+ proba <- Gam2
+ LLH <- c(sum(log(apply(Gam,1,sum))), (dimension + m + 1) * k - 1)
+ # ## Computation of the loglikelihood
+ # # Precompute det(rhoLambda[,,r]) for r in 1...k
+ # detRho <- sapply(1:k, function(r) gdet(rhoLambda[, , r]))
+ # sumLogLLH <- 0
+ # for (i in 1:n)
+ # {
+ # # Update gam[,]; use log to avoid numerical problems
+ # logGam <- sapply(1:k, function(r) {
+ # log(piLambda[r]) + log(detRho[r]) - 0.5 *
+ # sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2)
+ # })
+ #
+ # #logGam <- logGam - max(logGam) #adjust without changing proportions -> change the LLH
+ # gam <- exp(logGam)
+ # norm_fact <- sum(gam)
+ # sumLogLLH <- sumLogLLH + log(norm_fact) - m/2* log(2 * base::pi)
+ # }
+ #llhLambda <- c(-sumLogLLH/n, (dimension + m + 1) * k - 1)
+ list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = LLH, affec = affec, proba = proba)
+ }
+
+ # For each lambda, computation of the parameters
+ out <-
+ if (ncores > 1) {
+ parLapply(cl, 1:length(S), computeAtLambda)
+ } else {
+ lapply(1:length(S), computeAtLambda)
+ }
+
+ if (ncores > 1)
+ parallel::stopCluster(cl)
+
+ out
+}
--- /dev/null
+#' constructionModelesLassoRank
+#'
+#' Construct a collection of models with the Lasso-Rank procedure.
+#'
+#' @param S output of selectVariables.R
+#' @param k number of components
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
+#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
+#' @param ncores Number of cores, by default = 3
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#'
+#' @return a list with several models, defined by phi, rho, pi, llh
+#'
+#' @export
+constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max,
+ ncores, fast, verbose)
+{
+ n <- nrow(X)
+ p <- ncol(X)
+ m <- ncol(Y)
+ L <- length(S)
+
+ # Possible interesting ranks
+ deltaRank <- rank.max - rank.min + 1
+ Size <- deltaRank^k
+ RankLambda <- matrix(0, nrow = Size * L, ncol = k + 1)
+ for (r in 1:k)
+ {
+ # On veut le tableau de toutes les combinaisons de rangs possibles, et des
+ # lambdas Dans la première colonne : on répète (rank.max-rank.min)^(k-1) chaque
+ # chiffre : ça remplit la colonne Dans la deuxieme : on répète
+ # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ça (rank.max-rank.min)^2
+ # fois ... Dans la dernière, on répète chaque chiffre une fois, et on fait ça
+ # (rank.min-rank.max)^(k-1) fois.
+ RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1),
+ each = deltaRank^(k - r)), each = L)
+ }
+ RankLambda[, k + 1] <- rep(1:L, times = Size)
+
+ if (ncores > 1)
+ {
+ cl <- parallel::makeCluster(ncores, outfile = "")
+ parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size",
+ "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores",
+ "verbose"))
+ }
+
+ computeAtLambda <- function(index)
+ {
+ lambdaIndex <- RankLambda[index, k + 1]
+ rankIndex <- RankLambda[index, 1:k]
+ if (ncores > 1)
+ require("valse") #workers start with an empty environment
+
+ # 'relevant' will be the set of relevant columns
+ selected <- S[[lambdaIndex]]$selected
+ relevant <- c()
+ for (j in 1:p)
+ {
+ if (length(selected[[j]]) > 0)
+ relevant <- c(relevant, j)
+ }
+ if (max(rankIndex) < length(relevant))
+ {
+ phi <- array(0, dim = c(p, m, k))
+ if (length(relevant) > 0)
+ {
+ res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi,
+ X[, relevant], Y, eps, rankIndex, fast)
+ llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m)))
+ phi[relevant, , ] <- res$phi
+ }
+ list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho)
+ }
+ }
+
+ # For each lambda in the grid we compute the estimators
+ out <-
+ if (ncores > 1) {
+ parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
+ } else {
+ lapply(seq_len(length(S) * Size), computeAtLambda)
+ }
+
+ if (ncores > 1)
+ parallel::stopCluster(cl)
+
+ out
+}
--- /dev/null
+#' generateXY
+#'
+#' Generate a sample of (X,Y) of size n
+#'
+#' @param n sample size
+#' @param π proportion for each cluster
+#' @param meanX matrix of group means for covariates (of size p)
+#' @param covX covariance for covariates (of size p*p)
+#' @param β regression matrix, of size p*m*k
+#' @param covY covariance for the response vector (of size m*m*K)
+#'
+#' @return list with X and Y
+#'
+#' @export
+generateXY <- function(n, π, meanX, β, covX, covY)
+{
+ p <- dim(covX)[1]
+ m <- dim(covY)[1]
+ k <- dim(covY)[3]
+
+ X <- matrix(nrow = 0, ncol = p)
+ Y <- matrix(nrow = 0, ncol = m)
+
+ # random generation of the size of each population in X~Y (unordered)
+ sizePop <- rmultinom(1, n, π)
+ class <- c() #map i in 1:n --> index of class in 1:k
+
+ for (i in 1:k)
+ {
+ class <- c(class, rep(i, sizePop[i]))
+ newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
+ X <- rbind(X, newBlockX)
+ Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*%
+ β[, , i], covY[, , i]))))
+ }
+
+ shuffle <- sample(n)
+ list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle])
+}
--- /dev/null
+#' initialization of the EM algorithm
+#'
+#' @param k number of components
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#'
+#' @return a list with phiInit, rhoInit, piInit, gamInit
+#' @export
+#' @importFrom methods new
+#' @importFrom stats cutree dist hclust runif
+initSmallEM <- function(k, X, Y, fast)
+{
+ n <- nrow(X)
+ p <- ncol(X)
+ m <- ncol(Y)
+ nIte <- 20
+ Zinit1 <- array(0, dim = c(n, nIte))
+ betaInit1 <- array(0, dim = c(p, m, k, nIte))
+ sigmaInit1 <- array(0, dim = c(m, m, k, nIte))
+ phiInit1 <- array(0, dim = c(p, m, k, nIte))
+ rhoInit1 <- array(0, dim = c(m, m, k, nIte))
+ Gam <- matrix(0, n, k)
+ piInit1 <- matrix(0, nIte, k)
+ gamInit1 <- array(0, dim = c(n, k, nIte))
+ LLFinit1 <- list()
+
+ # require(MASS) #Moore-Penrose generalized inverse of matrix
+ for (repet in 1:nIte)
+ {
+ distance_clus <- dist(cbind(X, Y))
+ tree_hier <- hclust(distance_clus)
+ Zinit1[, repet] <- cutree(tree_hier, k)
+
+ for (r in 1:k)
+ {
+ Z <- Zinit1[, repet]
+ Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r
+ if (length(Z_indice) == 1) {
+ betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*%
+ crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
+ } else {
+ betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*%
+ crossprod(X[Z_indice, ], Y[Z_indice, ])
+ }
+ sigmaInit1[, , r, repet] <- diag(m)
+ phiInit1[, , r, repet] <- betaInit1[, , r, repet] #/ sigmaInit1[,,r,repet]
+ rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
+ piInit1[repet, r] <- mean(Z == r)
+ }
+
+ for (i in 1:n)
+ {
+ for (r in 1:k)
+ {
+ dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
+ - X[i, ] %*% phiInit1[, , r, repet])
+ Gam[i, r] <- piInit1[repet, r] *
+ det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
+ }
+ sumGamI <- sum(Gam[i, ])
+ # TODO: next line is a division by zero if dotProduct is big
+ gamInit1[i, , repet] <- Gam[i, ]/sumGamI
+ }
+
+ miniInit <- 10
+ maxiInit <- 11
+
+ init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ],
+ gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y,
+ eps = 1e-04, fast)
+ LLFinit1[[repet]] <- init_EMG$llh
+ }
+ b <- which.min(LLFinit1)
+ phiInit <- phiInit1[, , , b]
+ rhoInit <- rhoInit1[, , , b]
+ piInit <- piInit1[b, ]
+ gamInit <- gamInit1[, , b]
+
+ return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
+}
--- /dev/null
+#' valse
+#'
+#' Main function
+#'
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param procedure among 'LassoMLE' or 'LassoRank'
+#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC'
+#' @param gamma integer for the power in the penaly, by default = 1
+#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
+#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
+#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
+#' @param kmin integer, minimum number of clusters, by default = 2
+#' @param kmax integer, maximum number of clusters, by default = 10
+#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
+#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
+#' @param ncores_outer Number of cores for the outer loop on k
+#' @param ncores_inner Number of cores for the inner loop on lambda
+#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
+#' @param grid_lambda, a vector with regularization parameters if known, by default numeric(0)
+#' @param size_coll_mod (Maximum) size of a collection of models
+#' @param fast TRUE to use compiled C code, FALSE for R code only
+#' @param verbose TRUE to show some execution traces
+#'
+#' @return a list with estimators of parameters
+#'
+#' @examples
+#' #TODO: a few examples
+#' @export
+valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10,
+ maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1,
+ ncores_inner = 1, thresh = 1e-08, grid_lambda = numeric(0), size_coll_mod = 10,
+ fast = TRUE, verbose = FALSE, plot = TRUE)
+{
+ n <- nrow(X)
+ p <- ncol(X)
+ m <- ncol(Y)
+
+ if (verbose)
+ print("main loop: over all k and all lambda")
+
+ if (ncores_outer > 1) {
+ cl <- parallel::makeCluster(ncores_outer, outfile = "")
+ parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X",
+ "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin",
+ "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh",
+ "size_coll_mod", "verbose", "p", "m"))
+ }
+
+ # Compute models with k components
+ computeModels <- function(k)
+ {
+ if (ncores_outer > 1)
+ require("valse") #nodes start with an empty environment
+
+ if (verbose)
+ print(paste("Parameters initialization for k =", k))
+ # smallEM initializes parameters by k-means and regression model in each
+ # component, doing this 20 times, and keeping the values maximizing the
+ # likelihood after 10 iterations of the EM algorithm.
+ P <- initSmallEM(k, X, Y, fast)
+ if (length(grid_lambda) == 0)
+ {
+ grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
+ X, Y, gamma, mini, maxi, eps, fast)
+ }
+ if (length(grid_lambda) > size_coll_mod)
+ grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
+
+ if (verbose)
+ print("Compute relevant parameters")
+ # select variables according to each regularization parameter from the grid:
+ # S$selected corresponding to selected variables
+ S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi,
+ gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
+
+ if (procedure == "LassoMLE") {
+ if (verbose)
+ print("run the procedure Lasso-MLE")
+ # compute parameter estimations, with the Maximum Likelihood Estimator,
+ # restricted on selected variables.
+ models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit,
+ P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
+ } else {
+ if (verbose)
+ print("run the procedure Lasso-Rank")
+ # compute parameter estimations, with the Low Rank Estimator, restricted on
+ # selected variables.
+ models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min,
+ rank.max, ncores_inner, fast, verbose)
+ }
+ # warning! Some models are NULL after running selectVariables
+ models <- models[sapply(models, function(cell) !is.null(cell))]
+ models
+ }
+
+ # List (index k) of lists (index lambda) of models
+ models_list <-
+ if (ncores_outer > 1) {
+ parLapply(cl, kmin:kmax, computeModels)
+ } else {
+ lapply(kmin:kmax, computeModels)
+ }
+ if (ncores_outer > 1)
+ parallel::stopCluster(cl)
+
+ if (!requireNamespace("capushe", quietly = TRUE))
+ {
+ warning("'capushe' not available: returning all models")
+ return(models_list)
+ }
+
+ # Get summary 'tableauRecap' from models
+ tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
+ {
+ models <- models_list[[i]]
+ # For a collection of models (same k, several lambda):
+ LLH <- sapply(models, function(model) model$llh[1])
+ k <- length(models[[1]]$pi)
+ sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[,
+ , 1] != 0) + 1) - 1)
+ data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n,
+ complexity = sumPen, contrast = -LLH)
+ }))
+ tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
+
+ if (verbose == TRUE)
+ print(tableauRecap)
+ modSel <- capushe::capushe(tableauRecap, n)
+ indModSel <- if (selecMod == "DDSE")
+ {
+ as.numeric(modSel@DDSE@model)
+ } else if (selecMod == "Djump")
+ {
+ as.numeric(modSel@Djump@model)
+ } else if (selecMod == "BIC")
+ {
+ modSel@BIC_capushe$model
+ } else if (selecMod == "AIC")
+ {
+ modSel@AIC_capushe$model
+ }
+
+ listMod <- as.integer(unlist(strsplit(as.character(indModSel), "[.]")))
+ modelSel <- models_list[[listMod[1]]][[listMod[2]]]
+ modelSel$tableau <- tableauRecap
+
+ if (plot)
+ print(plot_valse(X, Y, modelSel, n))
+
+ return(modelSel)
+}
--- /dev/null
+#' Plot
+#'
+#' It is a function which plots relevant parameters
+#'
+#' @param X matrix of covariates (of size n*p)
+#' @param Y matrix of responses (of size n*m)
+#' @param model the model constructed by valse procedure
+#' @param n sample size
+#' @return several plots
+#'
+#' @examples TODO
+#'
+#' @export
+#'
+plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
+{
+ require("gridExtra")
+ require("ggplot2")
+ require("reshape2")
+ require("cowplot")
+
+ K <- length(model$pi)
+ ## regression matrices
+ gReg <- list()
+ for (r in 1:K)
+ {
+ Melt <- melt(t((model$phi[, , r])))
+ gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) +
+ geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
+ midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
+ }
+ print(gReg)
+
+ ## Differences between two clusters
+ if (comp)
+ {
+ if (is.na(k1) || is.na(k))
+ print("k1 and k2 must be integers, representing the clusters you want to compare")
+ Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
+ gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
+ + geom_tile()
+ + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+ space = "Lab")
+ + ggtitle(paste("Difference between regression matrices in cluster",
+ k1, "and", k2))
+ print(gDiff)
+ }
+
+ ### Covariance matrices
+ matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
+ for (r in 1:K)
+ matCov[, r] <- diag(model$rho[, , r])
+ MeltCov <- melt(matCov)
+ gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
+ + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+ space = "Lab")
+ + ggtitle("Covariance matrices")
+ print(gCov)
+
+ ### Proportions
+ gam2 <- matrix(NA, ncol = K, nrow = n)
+ for (i in 1:n)
+ gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
+
+ bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2))
+ + geom_boxplot()
+ + theme(legend.position = "none")
+ + background_grid(major = "xy", minor = "none")
+ print(bp)
+
+ ### Mean in each cluster
+ XY <- cbind(X, Y)
+ XY_class <- list()
+ meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2])
+ for (r in 1:K)
+ {
+ XY_class[[r]] <- XY[model$affec == r, ]
+ if (sum(model$affec == r) == 1) {
+ meanPerClass[, r] <- XY_class[[r]]
+ } else {
+ meanPerClass[, r] <- apply(XY_class[[r]], 2, mean)
+ }
+ }
+ data <- data.frame(mean = as.vector(meanPerClass),
+ cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
+ g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
+ print(g + geom_line(aes(linetype = cluster, color = cluster))
+ + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster"))
+}
--- /dev/null
+#' selectVariables
+#'
+#' It is a function which construct, for a given lambda, the sets of relevant variables.
+#'
+#' @param phiInit an initial estimator for phi (size: p*m*k)
+#' @param rhoInit an initial estimator for rho (size: m*m*k)
+#' @param piInit an initial estimator for pi (size : k)
+#' @param gamInit an initial estimator for gamma
+#' @param mini minimum number of iterations in EM algorithm
+#' @param maxi maximum number of iterations in EM algorithm
+#' @param gamma power in the penalty
+#' @param glambda grid of regularization parameters
+#' @param X matrix of regressors
+#' @param Y matrix of responses
+#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
+#' @param eps threshold to say that EM algorithm has converged
+#' @param ncores Number or cores for parallel execution (1 to disable)
+#'
+#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
+#'
+#' @examples TODO
+#'
+#' @export
+#'
+selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma,
+ glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast)
+{
+ if (ncores > 1) {
+ cl <- parallel::makeCluster(ncores, outfile = "")
+ parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit",
+ "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
+ }
+
+ # Computation for a fixed lambda
+ computeCoefs <- function(lambda)
+ {
+ params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
+ X, Y, eps, fast)
+
+ p <- ncol(X)
+ m <- ncol(Y)
+
+ # selectedVariables: list where element j contains vector of selected variables
+ # in [1,m]
+ selectedVariables <- lapply(1:p, function(j) {
+ # from boolean matrix mxk of selected variables obtain the corresponding boolean
+ # m-vector, and finally return the corresponding indices
+ if (m>1) {
+ seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
+ } else {
+ if (any(params$phi[j, , ] > thresh))
+ 1
+ else
+ numeric(0)
+ }
+ })
+
+ list(selected = selectedVariables, Rho = params$rho, Pi = params$pi)
+ }
+
+ # For each lambda in the grid, we compute the coefficients
+ out <-
+ if (ncores > 1) {
+ parLapply(cl, glambda, computeCoefs)
+ } else {
+ lapply(glambda, computeCoefs)
+ }
+ if (ncores > 1)
+ parallel::stopCluster(cl)
+
+ print(out)
+ # Suppress models which are computed twice En fait, ca ca fait la comparaison de
+ # tous les parametres On veut juste supprimer ceux qui ont les memes variables
+ # sélectionnées
+ # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ]
+ selec <- lapply(out, function(model) model$selected)
+ ind_dup <- duplicated(selec)
+ ind_uniq <- which(!ind_dup)
+ out2 <- list()
+ for (l in 1:length(ind_uniq))
+ out2[[l]] <- out[[ind_uniq[l]]]
+ out2
+}
--- /dev/null
+# ...
+gdet <- function(M)
+{
+ if (is.matrix(M))
+ return (det(M))
+ return (M[1]) #numeric, double
+}
--- /dev/null
+ou alors data_test.RData, possible aussi
--- /dev/null
+\name{valse-package}
+\alias{valse-package}
+\alias{valse}
+\docType{package}
+
+\title{
+ \packageTitle{valse}
+}
+
+\description{
+ \packageDescription{valse}
+}
+
+\details{
+ The package devtools should be useful in development stage, since we rely on testthat for
+ unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette.
+ Concerning the other suggested packages:
+ \itemize{
+ \item{parallel (generally) permits to run the bootstrap method faster.}
+ }
+
+ The three main functions are ...
+}
+
+\author{
+ \packageAuthor{valse}
+
+ Maintainer: \packageMaintainer{valse}
+}
+
+%\references{
+% TODO: Literature or other references for background information
+%}
+
+%\examples{
+% TODO: simple examples of the most important functions
+%}
--- /dev/null
+#Debug flags
+PKG_CFLAGS=-g -I./sources
+
+#Prod flags:
+#PKG_CFLAGS=-O2 -I./sources
+
+PKG_LIBS=-lm -lgsl -lcblas
+
+SOURCES = $(wildcard adapters/*.c sources/*.c)
+
+OBJECTS = $(SOURCES:.c=.o)
--- /dev/null
+#include <R.h>
+#include <Rdefines.h>
+#include "EMGLLF.h"
+
+// See comments in src/sources/EMGLLF.c and R/EMGLLF.R (wrapper)
+SEXP EMGLLF(
+ SEXP phiInit_,
+ SEXP rhoInit_,
+ SEXP piInit_,
+ SEXP gamInit_,
+ SEXP mini_,
+ SEXP maxi_,
+ SEXP gamma_,
+ SEXP lambda_,
+ SEXP X_,
+ SEXP Y_,
+ SEXP eps_
+) {
+ // Get matrices dimensions
+ int n = INTEGER(getAttrib(X_, R_DimSymbol))[0];
+ SEXP dim = getAttrib(phiInit_, R_DimSymbol);
+ int p = INTEGER(dim)[0];
+ int m = INTEGER(dim)[1];
+ int k = INTEGER(dim)[2];
+
+ ////////////
+ // INPUTS //
+ ////////////
+
+ // get scalar parameters
+ int mini = INTEGER_VALUE(mini_);
+ int maxi = INTEGER_VALUE(maxi_);
+ double gamma = NUMERIC_VALUE(gamma_);
+ double lambda = NUMERIC_VALUE(lambda_);
+ double eps = NUMERIC_VALUE(eps_);
+
+ // Get pointers from SEXP arrays ; WARNING: by columns !
+ double* phiInit = REAL(phiInit_);
+ double* rhoInit = REAL(rhoInit_);
+ double* piInit = REAL(piInit_);
+ double* gamInit = REAL(gamInit_);
+ double* X = REAL(X_);
+ double* Y = REAL(Y_);
+
+ /////////////
+ // OUTPUTS //
+ /////////////
+
+ SEXP phi, rho, pi, llh, S, affec, dimPhiS, dimRho;
+ PROTECT(dimPhiS = allocVector(INTSXP, 3));
+ int* pDimPhiS = INTEGER(dimPhiS);
+ pDimPhiS[0] = p; pDimPhiS[1] = m; pDimPhiS[2] = k;
+ PROTECT(dimRho = allocVector(INTSXP, 3));
+ int* pDimRho = INTEGER(dimRho);
+ pDimRho[0] = m; pDimRho[1] = m; pDimRho[2] = k;
+ PROTECT(phi = allocArray(REALSXP, dimPhiS));
+ PROTECT(rho = allocArray(REALSXP, dimRho));
+ PROTECT(pi = allocVector(REALSXP, k));
+ PROTECT(llh = allocVector(REALSXP, 1));
+ PROTECT(S = allocArray(REALSXP, dimPhiS));
+ PROTECT(affec = allocVector(INTSXP, n));
+ double *pPhi=REAL(phi), *pRho=REAL(rho), *pPi=REAL(pi), *pLlh=REAL(llh), *pS=REAL(S);
+ int *pAffec=INTEGER(affec);
+
+ ////////////////////
+ // Call to EMGLLF //
+ ////////////////////
+
+ EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,
+ pPhi,pRho,pPi,pLlh,pS,pAffec,
+ n,p,m,k);
+
+ // Build list from OUT params and return it
+ SEXP listParams, listNames;
+ int nouts = 6;
+ PROTECT(listParams = allocVector(VECSXP, nouts));
+ char* lnames[6] = {"phi", "rho", "pi", "llh", "S", "affec"}; //lists labels
+ PROTECT(listNames = allocVector(STRSXP,nouts));
+ for (int i=0; i<nouts; i++)
+ SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
+ setAttrib(listParams, R_NamesSymbol, listNames);
+ SET_VECTOR_ELT(listParams, 0, phi);
+ SET_VECTOR_ELT(listParams, 1, rho);
+ SET_VECTOR_ELT(listParams, 2, pi);
+ SET_VECTOR_ELT(listParams, 3, llh);
+ SET_VECTOR_ELT(listParams, 4, S);
+ SET_VECTOR_ELT(listParams, 5, affec);
+
+ UNPROTECT(10);
+ return listParams;
+}
--- /dev/null
+#include <R.h>
+#include <Rdefines.h>
+#include "EMGrank.h"
+
+// See comments in src/sources/EMGrank.c and R/EMGrank.R (wrapper)
+SEXP EMGrank(
+ SEXP Pi_,
+ SEXP Rho_,
+ SEXP mini_,
+ SEXP maxi_,
+ SEXP X_,
+ SEXP Y_,
+ SEXP eps_,
+ SEXP rank_
+) {
+ // Get matrices dimensions
+ SEXP dimX = getAttrib(X_, R_DimSymbol);
+ int n = INTEGER(dimX)[0];
+ int p = INTEGER(dimX)[1];
+ SEXP dimRho = getAttrib(Rho_, R_DimSymbol);
+ int m = INTEGER(dimRho)[0];
+ int k = INTEGER(dimRho)[2];
+
+ ////////////
+ // INPUTS //
+ ////////////
+
+ // get scalar parameters
+ int mini = INTEGER_VALUE(mini_);
+ int maxi = INTEGER_VALUE(maxi_);
+ double eps = NUMERIC_VALUE(eps_);
+
+ // Get pointers from SEXP arrays ; WARNING: by columns !
+ double* Pi = REAL(Pi_);
+ double* Rho = REAL(Rho_);
+ double* X = REAL(X_);
+ double* Y = REAL(Y_);
+ int* rank = INTEGER(rank_);
+
+ /////////////
+ // OUTPUTS //
+ /////////////
+
+ SEXP phi, LLF, dimPhi;
+ PROTECT(dimPhi = allocVector(INTSXP, 3));
+ int* pDimPhi = INTEGER(dimPhi);
+ pDimPhi[0] = p; pDimPhi[1] = m; pDimPhi[2] = k;
+ PROTECT(phi = allocArray(REALSXP, dimPhi));
+ PROTECT(LLF = allocVector(REALSXP, 1));
+ double *pPhi=REAL(phi), *pLLF=REAL(LLF);
+
+ /////////////////////
+ // Call to EMGrank //
+ /////////////////////
+
+ EMGrank_core(Pi, Rho, mini, maxi, X, Y, eps, rank,
+ pPhi,pLLF,
+ n,p,m,k);
+
+ // Build list from OUT params and return it
+ SEXP listParams, listNames;
+ PROTECT(listParams = allocVector(VECSXP, 2));
+ char* lnames[2] = {"phi", "LLF"}; //lists labels
+ PROTECT(listNames = allocVector(STRSXP,2));
+ for (int i=0; i<2; i++)
+ SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
+ setAttrib(listParams, R_NamesSymbol, listNames);
+ SET_VECTOR_ELT(listParams, 0, phi);
+ SET_VECTOR_ELT(listParams, 1, LLF);
+
+ UNPROTECT(5);
+ return listParams;
+}
--- /dev/null
+#include "utils.h"
+#include <stdlib.h>
+#include <math.h>
+#include <gsl/gsl_linalg.h>
+
+// TODO: don't recompute indexes ai(...) and mi(...) when possible
+void EMGLLF_core(
+ // IN parameters
+ const Real* phiInit, // parametre initial de moyenne renormalisé
+ const Real* rhoInit, // parametre initial de variance renormalisé
+ const Real* piInit, // parametre initial des proportions
+ const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon
+ int mini, // nombre minimal d'itérations dans l'algorithme EM
+ int maxi, // nombre maximal d'itérations dans l'algorithme EM
+ Real gamma, // puissance des proportions dans la pénalisation pour un Lasso adaptatif
+ Real lambda, // valeur du paramètre de régularisation du Lasso
+ const Real* X, // régresseurs
+ const Real* Y, // réponse
+ Real eps, // seuil pour accepter la convergence
+ // OUT parameters (all pointers, to be modified)
+ Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
+ Real* rho, // parametre de variance renormalisé, calculé par l'EM
+ Real* pi, // parametre des proportions renormalisé, calculé par l'EM
+ Real* llh, // (derniere) log vraisemblance associée à cet échantillon,
+ // pour les valeurs estimées des paramètres
+ Real* S,
+ int* affec,
+ // additional size parameters
+ int n, // nombre d'echantillons
+ int p, // nombre de covariables
+ int m, // taille de Y (multivarié)
+ int k) // nombre de composantes dans le mélange
+{
+ //Initialize outputs
+ copyArray(phiInit, phi, p*m*k);
+ copyArray(rhoInit, rho, m*m*k);
+ copyArray(piInit, pi, k);
+ //S is already allocated, and doesn't need to be 'zeroed'
+
+ //Other local variables: same as in R
+ Real* gam = (Real*)malloc(n*k*sizeof(Real));
+ Real* logGam = (Real*)malloc(k*sizeof(Real));
+ copyArray(gamInit, gam, n*k);
+ Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real));
+ Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real));
+ Real* b = (Real*)malloc(k*sizeof(Real));
+ Real* X2 = (Real*)malloc(n*p*k*sizeof(Real));
+ Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real));
+ *llh = -INFINITY;
+ Real* pi2 = (Real*)malloc(k*sizeof(Real));
+ // Additional (not at this place, in R file)
+ Real* gam2 = (Real*)malloc(k*sizeof(Real));
+ Real* sqNorm2 = (Real*)malloc(k*sizeof(Real));
+ Real* detRho = (Real*)malloc(k*sizeof(Real));
+ gsl_matrix* matrix = gsl_matrix_alloc(m, m);
+ gsl_permutation* permutation = gsl_permutation_alloc(m);
+ Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
+ Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
+ const Real gaussConstM = pow(2.*M_PI,m/2.);
+ Real* Phi = (Real*)malloc(p*m*k*sizeof(Real));
+ Real* Rho = (Real*)malloc(m*m*k*sizeof(Real));
+ Real* Pi = (Real*)malloc(k*sizeof(Real));
+
+ for (int ite=1; ite<=maxi; ite++)
+ {
+ copyArray(phi, Phi, p*m*k);
+ copyArray(rho, Rho, m*m*k);
+ copyArray(pi, Pi, k);
+
+ // Calculs associés a Y et X
+ for (int r=0; r<k; r++)
+ {
+ for (int mm=0; mm<m; mm++)
+ {
+ //Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
+ for (int u=0; u<n; u++)
+ Y2[ai(u,mm,r,n,m,k)] = sqrt(gam[mi(u,r,n,k)]) * Y[mi(u,mm,n,m)];
+ }
+ for (int i=0; i<n; i++)
+ {
+ //X2[i,,r] = sqrt(gam[i,r]) * X[i,]
+ for (int u=0; u<p; u++)
+ X2[ai(i,u,r,n,p,k)] = sqrt(gam[mi(i,r,n,k)]) * X[mi(i,u,n,p)];
+ }
+ for (int mm=0; mm<m; mm++)
+ {
+ //ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
+ for (int u=0; u<p; u++)
+ {
+ Real dotProduct = 0.;
+ for (int v=0; v<n; v++)
+ dotProduct += X2[ai(v,u,r,n,p,k)] * Y2[ai(v,mm,r,n,m,k)];
+ ps2[ai(u,mm,r,p,m,k)] = dotProduct;
+ }
+ }
+ for (int j=0; j<p; j++)
+ {
+ for (int s=0; s<p; s++)
+ {
+ //Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
+ Real dotProduct = 0.;
+ for (int u=0; u<n; u++)
+ dotProduct += X2[ai(u,j,r,n,p,k)] * X2[ai(u,s,r,n,p,k)];
+ Gram2[ai(j,s,r,p,p,k)] = dotProduct;
+ }
+ }
+ }
+
+ /////////////
+ // Etape M //
+ /////////////
+
+ // Pour pi
+ for (int r=0; r<k; r++)
+ {
+ //b[r] = sum(abs(phi[,,r]))
+ Real sumAbsPhi = 0.;
+ for (int u=0; u<p; u++)
+ for (int v=0; v<m; v++)
+ sumAbsPhi += fabs(phi[ai(u,v,r,p,m,k)]);
+ b[r] = sumAbsPhi;
+ }
+ //gam2 = colSums(gam)
+ for (int u=0; u<k; u++)
+ {
+ Real sumOnColumn = 0.;
+ for (int v=0; v<n; v++)
+ sumOnColumn += gam[mi(v,u,n,k)];
+ gam2[u] = sumOnColumn;
+ }
+ //a = sum(gam %*% log(pi))
+ Real a = 0.;
+ for (int u=0; u<n; u++)
+ {
+ Real dotProduct = 0.;
+ for (int v=0; v<k; v++)
+ dotProduct += gam[mi(u,v,n,k)] * log(pi[v]);
+ a += dotProduct;
+ }
+
+ //tant que les proportions sont negatives
+ int kk = 0,
+ pi2AllPositive = 0;
+ Real invN = 1./n;
+ while (!pi2AllPositive)
+ {
+ //pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
+ Real pow_01_kk = pow(0.1,kk);
+ for (int r=0; r<k; r++)
+ pi2[r] = pi[r] + pow_01_kk * (invN*gam2[r] - pi[r]);
+ //pi2AllPositive = all(pi2 >= 0)
+ pi2AllPositive = 1;
+ for (int r=0; r<k; r++)
+ {
+ if (pi2[r] < 0)
+ {
+ pi2AllPositive = 0;
+ break;
+ }
+ }
+ kk++;
+ }
+
+ //sum(pi^gamma * b)
+ Real piPowGammaDotB = 0.;
+ for (int v=0; v<k; v++)
+ piPowGammaDotB += pow(pi[v],gamma) * b[v];
+ //sum(pi2^gamma * b)
+ Real pi2PowGammaDotB = 0.;
+ for (int v=0; v<k; v++)
+ pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
+ //sum(gam2 * log(pi2))
+ Real gam2DotLogPi2 = 0.;
+ for (int v=0; v<k; v++)
+ gam2DotLogPi2 += gam2[v] * log(pi2[v]);
+
+ //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
+ while (-invN*a + lambda*piPowGammaDotB < -invN*gam2DotLogPi2 + lambda*pi2PowGammaDotB
+ && kk<1000)
+ {
+ Real pow_01_kk = pow(0.1,kk);
+ //pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
+ for (int v=0; v<k; v++)
+ pi2[v] = pi[v] + pow_01_kk * (invN*gam2[v] - pi[v]);
+ //pi2 was updated, so we recompute pi2PowGammaDotB and gam2DotLogPi2
+ pi2PowGammaDotB = 0.;
+ for (int v=0; v<k; v++)
+ pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
+ gam2DotLogPi2 = 0.;
+ for (int v=0; v<k; v++)
+ gam2DotLogPi2 += gam2[v] * log(pi2[v]);
+ kk++;
+ }
+ Real t = pow(0.1,kk);
+ //sum(pi + t*(pi2-pi))
+ Real sumPiPlusTbyDiff = 0.;
+ for (int v=0; v<k; v++)
+ sumPiPlusTbyDiff += (pi[v] + t*(pi2[v] - pi[v]));
+ //pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
+ for (int v=0; v<k; v++)
+ pi[v] = (pi[v] + t*(pi2[v] - pi[v])) / sumPiPlusTbyDiff;
+
+ //Pour phi et rho
+ for (int r=0; r<k; r++)
+ {
+ for (int mm=0; mm<m; mm++)
+ {
+ Real ps = 0.,
+ nY2 = 0.;
+ // Compute ps, and nY2 = sum(Y2[,mm,r]^2)
+ for (int i=0; i<n; i++)
+ {
+ //< X2[i,,r] , phi[,mm,r] >
+ Real dotProduct = 0.;
+ for (int u=0; u<p; u++)
+ dotProduct += X2[ai(i,u,r,n,p,k)] * phi[ai(u,mm,r,p,m,k)];
+ //ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
+ ps += Y2[ai(i,mm,r,n,m,k)] * dotProduct;
+ nY2 += Y2[ai(i,mm,r,n,m,k)] * Y2[ai(i,mm,r,n,m,k)];
+ }
+ //rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
+ rho[ai(mm,mm,r,m,m,k)] = (ps + sqrt(ps*ps + 4*nY2 * gam2[r])) / (2*nY2);
+ }
+ }
+
+ for (int r=0; r<k; r++)
+ {
+ for (int j=0; j<p; j++)
+ {
+ for (int mm=0; mm<m; mm++)
+ {
+ //sum(phi[-j,mm,r] * Gram2[j,-j,r])
+ Real phiDotGram2 = 0.;
+ for (int u=0; u<p; u++)
+ {
+ if (u != j)
+ phiDotGram2 += phi[ai(u,mm,r,p,m,k)] * Gram2[ai(j,u,r,p,p,k)];
+ }
+ //S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
+ S[ai(j,mm,r,p,m,k)] = -rho[ai(mm,mm,r,m,m,k)] * ps2[ai(j,mm,r,p,m,k)]
+ + phiDotGram2;
+ Real pirPowGamma = pow(pi[r],gamma);
+ if (fabs(S[ai(j,mm,r,p,m,k)]) <= n*lambda*pirPowGamma)
+ phi[ai(j,mm,r,p,m,k)] = 0.;
+ else if (S[ai(j,mm,r,p,m,k)] > n*lambda*pirPowGamma)
+ {
+ phi[ai(j,mm,r,p,m,k)] = (n*lambda*pirPowGamma - S[ai(j,mm,r,p,m,k)])
+ / Gram2[ai(j,j,r,p,p,k)];
+ }
+ else
+ {
+ phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pirPowGamma + S[ai(j,mm,r,p,m,k)])
+ / Gram2[ai(j,j,r,p,p,k)];
+ }
+ }
+ }
+ }
+
+ /////////////
+ // Etape E //
+ /////////////
+
+ // Precompute det(rho[,,r]) for r in 1...k
+ int signum;
+ for (int r=0; r<k; r++)
+ {
+ for (int u=0; u<m; u++)
+ {
+ for (int v=0; v<m; v++)
+ matrix->data[u*m+v] = rho[ai(u,v,r,m,m,k)];
+ }
+ gsl_linalg_LU_decomp(matrix, permutation, &signum);
+ detRho[r] = gsl_linalg_LU_det(matrix, signum);
+ }
+
+ Real sumLogLLH = 0.;
+ for (int i=0; i<n; i++)
+ {
+ for (int r=0; r<k; r++)
+ {
+ //compute Y[i,]%*%rho[,,r]
+ for (int u=0; u<m; u++)
+ {
+ YiRhoR[u] = 0.;
+ for (int v=0; v<m; v++)
+ YiRhoR[u] += Y[mi(i,v,n,m)] * rho[ai(v,u,r,m,m,k)];
+ }
+
+ //compute X[i,]%*%phi[,,r]
+ for (int u=0; u<m; u++)
+ {
+ XiPhiR[u] = 0.;
+ for (int v=0; v<p; v++)
+ XiPhiR[u] += X[mi(i,v,n,p)] * phi[ai(v,u,r,p,m,k)];
+ }
+
+ //compute sq norm || Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) ||_2^2
+ sqNorm2[r] = 0.;
+ for (int u=0; u<m; u++)
+ sqNorm2[r] += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
+ }
+
+ // Update gam[,]; use log to avoid numerical problems
+ Real maxLogGam = -INFINITY;
+ for (int r=0; r<k; r++)
+ {
+ logGam[r] = log(pi[r]) - .5 * sqNorm2[r] + log(detRho[r]);
+ if (maxLogGam < logGam[r])
+ maxLogGam = logGam[r];
+ }
+ Real norm_fact = 0.;
+ for (int r=0; r<k; r++)
+ {
+ logGam[r] = logGam[r] - maxLogGam; //adjust without changing proportions
+ gam[mi(i,r,n,k)] = exp(logGam[r]); //gam[i, ] <- exp(logGam)
+ norm_fact += gam[mi(i,r,n,k)]; //norm_fact <- sum(gam[i, ])
+ }
+ // gam[i, ] <- gam[i, ] / norm_fact
+ for (int r=0; r<k; r++)
+ gam[mi(i,r,n,k)] /= norm_fact;
+
+ sumLogLLH += log(norm_fact) - log(gaussConstM);
+ }
+
+ //sumPen = sum(pi^gamma * b)
+ Real sumPen = 0.;
+ for (int r=0; r<k; r++)
+ sumPen += pow(pi[r],gamma) * b[r];
+ Real last_llh = *llh;
+ //llh = -sumLogLLH/n #+ lambda*sumPen
+ *llh = -invN * sumLogLLH; //+ lambda * sumPen;
+ Real dist = ( ite==1 ? *llh : (*llh - last_llh) / (1. + fabs(*llh)) );
+
+ //Dist1 = max( abs(phi-Phi) / (1+abs(phi)) )
+ Real Dist1 = 0.;
+ for (int u=0; u<p; u++)
+ {
+ for (int v=0; v<m; v++)
+ {
+ for (int w=0; w<k; w++)
+ {
+ Real tmpDist = fabs(phi[ai(u,v,w,p,m,k)]-Phi[ai(u,v,w,p,m,k)])
+ / (1.+fabs(phi[ai(u,v,w,p,m,k)]));
+ if (tmpDist > Dist1)
+ Dist1 = tmpDist;
+ }
+ }
+ }
+ //Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
+ Real Dist2 = 0.;
+ for (int u=0; u<m; u++)
+ {
+ for (int v=0; v<m; v++)
+ {
+ for (int w=0; w<k; w++)
+ {
+ Real tmpDist = fabs(rho[ai(u,v,w,m,m,k)]-Rho[ai(u,v,w,m,m,k)])
+ / (1.+fabs(rho[ai(u,v,w,m,m,k)]));
+ if (tmpDist > Dist2)
+ Dist2 = tmpDist;
+ }
+ }
+ }
+ //Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)))
+ Real Dist3 = 0.;
+ for (int u=0; u<n; u++)
+ {
+ for (int v=0; v<k; v++)
+ {
+ Real tmpDist = fabs(pi[v]-Pi[v]) / (1.+fabs(pi[v]));
+ if (tmpDist > Dist3)
+ Dist3 = tmpDist;
+ }
+ }
+ //dist2=max([max(Dist1),max(Dist2),max(Dist3)]);
+ Real dist2 = Dist1;
+ if (Dist2 > dist2)
+ dist2 = Dist2;
+ if (Dist3 > dist2)
+ dist2 = Dist3;
+
+ if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
+ break;
+ }
+
+ //affec = apply(gam, 1, which.max)
+ for (int i=0; i<n; i++)
+ {
+ Real rowMax = 0.;
+ affec[i] = 0;
+ for (int j=0; j<k; j++)
+ {
+ if (gam[mi(i,j,n,k)] > rowMax)
+ {
+ affec[i] = j+1; //R indices start at 1
+ rowMax = gam[mi(i,j,n,k)];
+ }
+ }
+ }
+
+ //free memory
+ free(b);
+ free(gam);
+ free(logGam);
+ free(Phi);
+ free(Rho);
+ free(Pi);
+ free(Gram2);
+ free(ps2);
+ free(detRho);
+ gsl_matrix_free(matrix);
+ gsl_permutation_free(permutation);
+ free(XiPhiR);
+ free(YiRhoR);
+ free(gam2);
+ free(pi2);
+ free(X2);
+ free(Y2);
+ free(sqNorm2);
+}\f
--- /dev/null
+#ifndef valse_EMGLLF_H
+#define valse_EMGLLF_H
+
+#include "utils.h"
+
+void EMGLLF_core(
+ // IN parameters
+ const Real* phiInit,
+ const Real* rhoInit,
+ const Real* piInit,
+ const Real* gamInit,
+ int mini,
+ int maxi,
+ Real gamma,
+ Real lambda,
+ const Real* X,
+ const Real* Y,
+ Real tau,
+ // OUT parameters
+ Real* phi,
+ Real* rho,
+ Real* pi,
+ Real* LLF,
+ Real* S,
+ int* affec,
+ // additional size parameters
+ int n,
+ int p,
+ int m,
+ int k);
+
+#endif
--- /dev/null
+#include <stdlib.h>
+#include <gsl/gsl_linalg.h>
+#include "utils.h"
+
+// Compute pseudo-inverse of a square matrix
+static Real* pinv(const Real* matrix, int dim)
+{
+ gsl_matrix* U = gsl_matrix_alloc(dim,dim);
+ gsl_matrix* V = gsl_matrix_alloc(dim,dim);
+ gsl_vector* S = gsl_vector_alloc(dim);
+ gsl_vector* work = gsl_vector_alloc(dim);
+ Real EPS = 1e-10; //threshold for singular value "== 0"
+
+ //copy matrix into U
+ copyArray(matrix, U->data, dim*dim);
+
+ //U,S,V = SVD of matrix
+ gsl_linalg_SV_decomp(U, V, S, work);
+ gsl_vector_free(work);
+
+ // Obtain pseudo-inverse by V*S^{-1}*t(U)
+ Real* inverse = (Real*)malloc(dim*dim*sizeof(Real));
+ for (int i=0; i<dim; i++)
+ {
+ for (int ii=0; ii<dim; ii++)
+ {
+ Real dotProduct = 0.0;
+ for (int j=0; j<dim; j++)
+ dotProduct += V->data[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j];
+ inverse[i*dim+ii] = dotProduct;
+ }
+ }
+
+ gsl_matrix_free(U);
+ gsl_matrix_free(V);
+ gsl_vector_free(S);
+ return inverse;
+}
+
+// TODO: comment EMGrank purpose
+void EMGrank_core(
+ // IN parameters
+ const Real* Pi, // parametre de proportion
+ const Real* Rho, // parametre initial de variance renormalisé
+ int mini, // nombre minimal d'itérations dans l'algorithme EM
+ int maxi, // nombre maximal d'itérations dans l'algorithme EM
+ const Real* X, // régresseurs
+ const Real* Y, // réponse
+ Real tau, // seuil pour accepter la convergence
+ const int* rank, // vecteur des rangs possibles
+ // OUT parameters
+ Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
+ Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
+ // additional size parameters
+ int n, // taille de l'echantillon
+ int p, // nombre de covariables
+ int m, // taille de Y (multivarié)
+ int k) // nombre de composantes
+{
+ // Allocations, initializations
+ Real* Phi = (Real*)calloc(p*m*k,sizeof(Real));
+ Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real));
+ int signum;
+ Real invN = 1.0/n;
+ int deltaPhiBufferSize = 20;
+ Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real));
+ int ite = 0;
+ Real sumDeltaPhi = 0.0;
+ Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
+ Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
+ Real* Xr = (Real*)malloc(n*p*sizeof(Real));
+ Real* Yr = (Real*)malloc(n*m*sizeof(Real));
+ Real* tXrXr = (Real*)malloc(p*p*sizeof(Real));
+ Real* tXrYr = (Real*)malloc(p*m*sizeof(Real));
+ gsl_matrix* matrixM = gsl_matrix_alloc(p, m);
+ gsl_matrix* matrixE = gsl_matrix_alloc(m, m);
+ gsl_permutation* permutation = gsl_permutation_alloc(m);
+ gsl_matrix* V = gsl_matrix_alloc(m,m);
+ gsl_vector* S = gsl_vector_alloc(m);
+ gsl_vector* work = gsl_vector_alloc(m);
+
+ //Initialize class memberships (all elements in class 0; TODO: randomize ?)
+ int* Z = (int*)calloc(n, sizeof(int));
+
+ //Initialize phi to zero, because some M loops might exit before phi affectation
+ zeroArray(phi, p*m*k);
+
+ while (ite<mini || (ite<maxi && sumDeltaPhi>tau))
+ {
+ /////////////
+ // Etape M //
+ /////////////
+
+ //M step: Mise à jour de Beta (et donc phi)
+ for (int r=0; r<k; r++)
+ {
+ //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:)
+ int cardClustR=0;
+ for (int i=0; i<n; i++)
+ {
+ if (Z[i] == r)
+ {
+ for (int j=0; j<p; j++)
+ Xr[mi(cardClustR,j,n,p)] = X[mi(i,j,n,p)];
+ for (int j=0; j<m; j++)
+ Yr[mi(cardClustR,j,n,m)] = Y[mi(i,j,n,m)];
+ cardClustR++;
+ }
+ }
+ if (cardClustR == 0)
+ continue;
+
+ //Compute tXrXr = t(Xr) * Xr
+ for (int j=0; j<p; j++)
+ {
+ for (int jj=0; jj<p; jj++)
+ {
+ Real dotProduct = 0.0;
+ for (int u=0; u<cardClustR; u++)
+ dotProduct += Xr[mi(u,j,n,p)] * Xr[mi(u,jj,n,p)];
+ tXrXr[mi(j,jj,p,p)] = dotProduct;
+ }
+ }
+
+ //Get pseudo inverse = (t(Xr)*Xr)^{-1}
+ Real* invTXrXr = pinv(tXrXr, p);
+
+ // Compute tXrYr = t(Xr) * Yr
+ for (int j=0; j<p; j++)
+ {
+ for (int jj=0; jj<m; jj++)
+ {
+ Real dotProduct = 0.0;
+ for (int u=0; u<cardClustR; u++)
+ dotProduct += Xr[mi(u,j,n,p)] * Yr[mi(u,jj,n,m)];
+ tXrYr[mi(j,jj,p,m)] = dotProduct;
+ }
+ }
+
+ //Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr
+ for (int j=0; j<p; j++)
+ {
+ for (int jj=0; jj<m; jj++)
+ {
+ Real dotProduct = 0.0;
+ for (int u=0; u<p; u++)
+ dotProduct += invTXrXr[mi(j,u,p,p)] * tXrYr[mi(u,jj,p,m)];
+ matrixM->data[j*m+jj] = dotProduct;
+ }
+ }
+ free(invTXrXr);
+
+ //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+ gsl_linalg_SV_decomp(matrixM, V, S, work);
+
+ //Set m-rank(r) singular values to zero, and recompose
+ //best rank(r) approximation of the initial product
+ for (int j=rank[r]; j<m; j++)
+ S->data[j] = 0.0;
+
+ //[intermediate step] Compute hatBetaR = U * S * t(V)
+ double* U = matrixM->data; //GSL require double precision
+ for (int j=0; j<p; j++)
+ {
+ for (int jj=0; jj<m; jj++)
+ {
+ Real dotProduct = 0.0;
+ for (int u=0; u<m; u++)
+ dotProduct += U[j*m+u] * S->data[u] * V->data[jj*m+u];
+ hatBetaR[mi(j,jj,p,m)] = dotProduct;
+ }
+ }
+
+ //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r)
+ for (int j=0; j<p; j++)
+ {
+ for (int jj=0; jj<m; jj++)
+ {
+ Real dotProduct=0.0;
+ for (int u=0; u<m; u++)
+ dotProduct += hatBetaR[mi(j,u,p,m)] * Rho[ai(u,jj,r,m,m,k)];
+ phi[ai(j,jj,r,p,m,k)] = dotProduct;
+ }
+ }
+ }
+
+ /////////////
+ // Etape E //
+ /////////////
+
+ Real sumLogLLF2 = 0.0;
+ for (int i=0; i<n; i++)
+ {
+ Real sumLLF1 = 0.0;
+ Real maxLogGamIR = -INFINITY;
+ for (int r=0; r<k; r++)
+ {
+ //Compute
+ //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)...
+ //*phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) );
+ //split in several sub-steps
+
+ //compute det(Rho(:,:,r)) [TODO: avoid re-computations]
+ for (int j=0; j<m; j++)
+ {
+ for (int jj=0; jj<m; jj++)
+ matrixE->data[j*m+jj] = Rho[ai(j,jj,r,m,m,k)];
+ }
+ gsl_linalg_LU_decomp(matrixE, permutation, &signum);
+ Real detRhoR = gsl_linalg_LU_det(matrixE, signum);
+
+ //compute Y(i,:)*Rho(:,:,r)
+ for (int j=0; j<m; j++)
+ {
+ YiRhoR[j] = 0.0;
+ for (int u=0; u<m; u++)
+ YiRhoR[j] += Y[mi(i,u,n,m)] * Rho[ai(u,j,r,m,m,k)];
+ }
+
+ //compute X(i,:)*phi(:,:,r)
+ for (int j=0; j<m; j++)
+ {
+ XiPhiR[j] = 0.0;
+ for (int u=0; u<p; u++)
+ XiPhiR[j] += X[mi(i,u,n,p)] * phi[ai(u,j,r,p,m,k)];
+ }
+
+ //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) >
+ Real dotProduct = 0.0;
+ for (int u=0; u<m; u++)
+ dotProduct += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
+ Real logGamIR = log(Pi[r]) + log(detRhoR) - 0.5*dotProduct;
+
+ //Z(i) = index of max (gam(i,:))
+ if (logGamIR > maxLogGamIR)
+ {
+ Z[i] = r;
+ maxLogGamIR = logGamIR;
+ }
+ sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0);
+ }
+
+ sumLogLLF2 += log(sumLLF1);
+ }
+
+ // Assign output variable LLF
+ *LLF = -invN * sumLogLLF2;
+
+ //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi))));
+ Real newDeltaPhi = 0.0;
+ for (int j=0; j<p; j++)
+ {
+ for (int jj=0; jj<m; jj++)
+ {
+ for (int r=0; r<k; r++)
+ {
+ Real tmpDist = fabs(phi[ai(j,jj,r,p,m,k)]-Phi[ai(j,jj,r,p,m,k)])
+ / (1.0+fabs(phi[ai(j,jj,r,p,m,k)]));
+ if (tmpDist > newDeltaPhi)
+ newDeltaPhi = tmpDist;
+ }
+ }
+ }
+
+ //update distance parameter to check algorithm convergence (delta(phi, Phi))
+ //TODO: deltaPhi should be a linked list for perf.
+ if (ite < deltaPhiBufferSize)
+ deltaPhi[ite] = newDeltaPhi;
+ else
+ {
+ sumDeltaPhi -= deltaPhi[0];
+ for (int u=0; u<deltaPhiBufferSize-1; u++)
+ deltaPhi[u] = deltaPhi[u+1];
+ deltaPhi[deltaPhiBufferSize-1] = newDeltaPhi;
+ }
+ sumDeltaPhi += newDeltaPhi;
+
+ // update other local variables
+ for (int j=0; j<m; j++)
+ {
+ for (int jj=0; jj<p; jj++)
+ {
+ for (int r=0; r<k; r++)
+ Phi[ai(jj,j,r,p,m,k)] = phi[ai(jj,j,r,p,m,k)];
+ }
+ }
+ ite++;
+ }
+
+ //free memory
+ free(hatBetaR);
+ free(deltaPhi);
+ free(Phi);
+ gsl_matrix_free(matrixE);
+ gsl_matrix_free(matrixM);
+ gsl_permutation_free(permutation);
+ gsl_vector_free(work);
+ gsl_matrix_free(V);
+ gsl_vector_free(S);
+ free(XiPhiR);
+ free(YiRhoR);
+ free(Xr);
+ free(Yr);
+ free(tXrXr);
+ free(tXrYr);
+ free(Z);
+}
--- /dev/null
+#ifndef valse_EMGrank_H
+#define valse_EMGrank_H
+
+#include "utils.h"
+
+void EMGrank_core(
+ // IN parameters
+ const Real* Pi,
+ const Real* Rho,
+ int mini,
+ int maxi,
+ const Real* X,
+ const Real* Y,
+ Real tau,
+ const int* rank,
+ // OUT parameters
+ Real* phi,
+ Real* LLF,
+ // additional size parameters
+ int n,
+ int p,
+ int m,
+ int k);
+
+#endif
--- /dev/null
+#ifndef valse_utils_H
+#define valse_utils_H
+
+//#include <stdint.h>
+
+/********
+ * Types
+ *******/
+
+typedef double Real;
+//typedef uint32_t UInt;
+//typedef int32_t Int;
+
+/*******************************
+ * Matrix and arrays indexation
+ *******************************/
+
+// Matrix Index ; TODO? ncol unused
+#define mi(i,j,nrow,ncol)\
+ j*nrow + i
+
+// Array Index ; TODO? d3 unused
+#define ai(i,j,k,d1,d2,d3)\
+ k*d1*d2 + j*d1 + i
+
+// Array4 Index ; TODO? ...
+#define ai4(i,j,k,m,d1,d2,d3,d4)\
+ m*d1*d2*d3 + k*d1*d2 + j*d1 + i
+
+/*************************
+ * Array copy & "zeroing"
+ ************************/
+
+// Fill an array with zeros
+#define zeroArray(array, size)\
+{\
+ for (int u=0; u<size; u++)\
+ array[u] = 0;\
+}
+
+// Copy an 1D array
+#define copyArray(array, copy, size)\
+{\
+ for (int u=0; u<size; u++)\
+ copy[u] = array[u];\
+}
+
+#endif
--- /dev/null
+library(testthat)
+library(valse) #ou load_all()
+
+test_check("valse")
--- /dev/null
+# Potential helpers for context 1
+help <- function()
+{
+ #...
+}
--- /dev/null
+context("Context1")
+
+test_that("function 1...",
+{
+ #expect_lte( ..., ...)
+})
+
+test_that("function 2...",
+{
+ #expect_equal(..., ...)
+})
--- /dev/null
+#ignore jupyter generated file (ipynb, HTML)
+*.html
+*.ipynb
+
+#and various (pdf)LaTeX files, in case of
+*.tex
+*.pdf
+*.aux
+*.dvi
+*.log
+*.out
+*.toc
+*.synctex.gz
+/figure/
--- /dev/null
+niveauColores
+
+for L=ind
+ for r=1:max(b(:,L))
+ betaLassoMLE(:,:,r)=inv(rhoLassoMLE(:,:,r,L)) * phiLassoMLE(:,:,r,L);
+ end
+end
+dessinRho = zeros(max(K),size(rhoLassoMLE,1),max(ind));
+cpt=1;
+gr = gray(64);
+
+ for L=ind
+ k=size(find(piLassoMLE(:,L)~=0),1)
+ figure(1)
+ %for r=1:k
+ r=1
+ subplot(1,k+1,r)
+ cpt=cpt+1;
+ colormap(gr(end:-1:1,:));
+ imagesc(abs(betaLassoMLE(:,:,r)))
+ set(gca,'xtick',[],'ytick',[])
+ set(gca, 'FontSize', 30)
+ title(['$\hat{\underline{\beta}_1}$'],'FontSize', 45,'Interpreter','latex')
+ h=colorbar;
+ set(gca, 'FontSize', 30)
+ r=2
+ subplot(1,k+1,r)
+ cpt=cpt+1;
+ colormap(gr(end:-1:1,:));
+ imagesc(abs(betaLassoMLE(:,:,r)))
+ set(gca,'xtick',[],'ytick',[])
+ set(gca, 'FontSize', 30)
+ title(['$\hat{\underline{\beta}_2}$'],'FontSize', 45,'Interpreter','latex')
+ h=colorbar;
+ set(gca, 'FontSize', 30)
+
+ %end
+ subplot(1,k+1,k+1)
+ gr = gray(64);
+ colormap(gr(end:-1:1,:));
+ imagesc(abs(betaLassoMLE(:,:,1)-betaLassoMLE(:,:,2)))
+ set(gca,'xtick',[],'ytick',[])
+ title('$|\hat{\underline{\beta}_1}-\hat{\underline{\beta}}_2|$','FontSize',45,'Interpreter','latex')
+ h=colorbar;
+ set(gca, 'FontSize', 30)
+ figure(L+k+1)
+ for r=1:k
+ for z=1:size(rhoLassoMLE,1)
+ dessinRho(r,z,L) = inv(rhoLassoMLE(z,z,r,L)^2);
+ end
+ end
+
+ %for r=1:k
+ r=1
+ subplot(k,1,r)
+% if r==1
+% colormap(cmapRouge(end:-1:1,:));
+% else if r==2
+% colormap(cmapBleu(end:-1:1,:));
+% else colormap(cmapVert(end:-1:1,:));
+% end
+% end
+ imagesc(dessinRho(r,:,L))
+ set(gca,'xtick',[],'ytick',[])
+ xlabh = get(gca,'XLabel');
+ set(xlabh,'Position',get(xlabh,'Position') + [0 .15 0])
+ title('$\hat{\Sigma}_1$','FontSize', 55,'Interpreter','latex')
+ xlabel('d_4[1] d_4[2] d_4[3] d_3[1] d_3[2] d_3[3] d_3[4] d_3[5] d_3[6]','FontSize', 30)
+ set(gca, 'FontSize', 45)
+ set(gca,'Position',[0.01 0.1+0.5*(2-r) 0.9 0.2])
+
+ r=2
+ subplot(k,1,r)
+% if r==1
+% colormap(cmapRouge(end:-1:1,:));
+% else if r==2
+% colormap(cmapBleu(end:-1:1,:));
+% else colormap(cmapVert(end:-1:1,:));
+% end
+% end
+ imagesc(dessinRho(r,:,L))
+ set(gca,'xtick',[],'ytick',[])
+ xlabh = get(gca,'XLabel');
+ set(xlabh,'Position',get(xlabh,'Position') + [0 .15 0])
+ title('$\hat{\Sigma}_2$','FontSize', 55,'Interpreter','latex')
+ xlabel('d_4[1] d_4[2] d_4[3] d_3[1] d_3[2] d_3[3] d_3[4] d_3[5] d_3[6]','FontSize', 30)
+ set(gca, 'FontSize', 45)
+ set(gca,'Position',[0.01 0.1+0.5*(2-r) 0.9 0.2])
+ %end
+ %h=colorbar;
+ gr = gray(64);
+ colormap(gr(end:-1:1,:));
+ axes('Position', [0.14 0.1 0.8 0.8], 'Visible', 'off');
+% c=colorbar;
+% set(gca, 'FontSize', 40)
+% caxis([0 max(max(dessinRho(:,:,L)))])
+end
\ No newline at end of file
--- /dev/null
+%niveau de rouge
+cmapRouge = ones(100,3);
+for i=1:100
+ cmapRouge(i,2) = i/100;
+ cmapRouge(i,3) = i/100;
+end
+%niveau de vert
+cmapVert = ones(100,3);
+for i=1:100
+ cmapVert(i,1) = i/100;
+ cmapVert(i,3) = i/100;
+end
+%niveau de bleu
+cmapBleu = ones(100,3);
+for i=1:100
+ cmapBleu(i,1) = i/100;
+ cmapBleu(i,2) = i/100;
+end
\ No newline at end of file
--- /dev/null
+%Dessin ondelettes
+%script
+figure(1)
+courbe = X2(:,1)-mean(X2(:,1));
+[C,L] = wavedec(courbe, 4,'haar');
+
+subplot(6,1,1)
+plot(1:7,courbe(1:7),'b','LineWidth',2)
+axis([0 48 -1.6 1.6])
+hold on
+plot(8:48,courbe(8:48),'b','LineWidth',2)
+%plot(waverec([C(1:12)',zeros(1,36)],L,'haar'),'b--','LineWidth',2)
+%courbe2 = X2(:,1);
+%[C2,L2] = wavedec(courbe2,4,'haar');
+%plot(waverec([zeros(1,3),C2(4:12)',zeros(1,36)],L2,'haar'),'b-.','LineWidth',2)
+hold off
+ylabel('z','FontSize', 30)
+set(gca, 'FontSize', 20)
+subplot(6,1,2)
+A4=zeros(1,48);
+Coeff = zeros(5,48);
+for r=1:16
+ A4(r)=C(1)/power(2,5/2);
+ A4(r+16)=C(2)/power(2,5/2);
+ A4(r+32) = C(3)/power(2,5/2);
+end
+Coeff(5,8)=C(1);
+Coeff(5,24) = C(2);
+Coeff(5,40) = C(3);
+stairs(A4,'b','LineWidth',2)
+axis([0 48 -0.5 0.5])
+ylabel('A_4','FontSize', 30)
+set(gca, 'FontSize', 20)
+subplot(6,1,3)
+D4=zeros(1,48);
+for r=1:8
+ D4(r) = C(4)/power(2,4/2);
+ D4(r+8) = -C(4)/power(2,4/2);
+ D4(r+16) = C(5)/power(2,4/2);
+ D4(r+24) = -C(5)/power(2,4/2);
+ D4(r+32) = C(6)/power(2,4/2);
+ D4(r+40) = -C(6)/power(2,4/2);
+end
+Coeff(4,8) = C(4);
+Coeff(4,24) = C(5);
+Coeff(4,40) = C(6);
+stairs(D4,'b','LineWidth',2)
+axis([0 48 -0.9 0.9])
+ylabel('D_4','FontSize', 30)
+ set(gca, 'FontSize', 20)
+subplot(6,1,4)
+D3=zeros(1,48);
+for k=1:12
+ for r=1:4
+ D3(r+4*(k-1)) = (-1)^(k+1) *C(7+floor((k-1)/2))/power(2,3/2);
+ end
+end
+stairs(D3,'b','LineWidth',2)
+ylabel('D_3','FontSize', 30)
+axis([0 48 -0.5 0.5])
+ set(gca, 'FontSize', 20)
+subplot(6,1,5)
+D2=zeros(1,48);
+for k=1:24
+ for r=1:2
+ D2(r+2*(k-1)) = (-1)^(k+1) *C(13+floor((k-1)/2))/power(2,2/2);
+ end
+end
+stairs(D2,'b','LineWidth',2)
+ylabel('D_2','FontSize', 30)
+axis([0 48 -0.8 0.8])
+ set(gca, 'FontSize', 20)
+subplot(6,1,6)
+D1=zeros(1,48);
+for k=1:48
+ for r=1
+ D1(r+1*(k-1)) = (-1)^(k+1) *C(25+floor((k-1)/2))/power(2,1/2);
+ end
+end
+plot(D1,'b','LineWidth',2);
+axis([0 48 -0.9 0.9])
+ylabel('D_1','FontSize', 30)
+set(gca, 'FontSize', 20)
--- /dev/null
+ondelettes
+niveauColores
+figure(2)
+subplot(6,1,1)
+plot(1:7,courbe(1:7),'b','LineWidth',2)
+hold on
+plot(8:48,courbe(8:48),'b','LineWidth',2)
+set(gca,'ytick',[])
+ylabel('z','FontSize', 30)
+set(gca, 'FontSize', 20)
+axis([0 48 -1.6 1.6])
+set(B, 'Position', [.91 .11 .03 .8150])
+subplot(6,1,2:6)
+colormap([cmapVert([1:1:end],:);cmapBleu(end:-1:1,:)]);
+ %colormap(gr(end:-1:1,:));
+indice = [3,3,6,12,24];
+indice2=[0,cumsum(indice)];
+beta = zeros(5,48);
+for j=1:indice(1)
+ for l=1:2^4
+ beta(1,(j-1)*2^4+l) = C(indice2(1)+j);
+ end
+end
+for r=2:5
+ for j=1:indice(r)
+ for l=1:2^(5-(r-1))
+ beta(r,(j-1)*2^(5-(r-1))+l) = C(indice2(r)+j);
+ end
+ end
+end
+
+
+imagesc(beta)
+ylabel('d_1 d_2 d_3 d_4 a_4','FontSize', 30)
+set(gca, 'FontSize', 20)
+%gr = gray(64);
+
+
+
+set(gca,'xtick',[],'ytick',[])
+set(gca, 'FontSize', 20)
+
+% axes('Position', [0.05 0.05 0.9 0.9], 'Visible', 'off');
+% c=colorbar ('FontSize',18);
+B=colorbar;
+set(B, 'Position', [.91 .11 .03 .8150])
+%set(gca,'Position',[.1 0.1 0.9 0.9])
+set(gca, 'FontSize', 15)
+% figure(3)
+% subplot(6,1,1)
+% plot(courbe,'LineWidth',2)
+% axis([0 48 -0.7 0.7])
+% subplot(6,1,2)
+% plot([8,24,40],C(1:3),'+','LineWidth',2)
+% axis([0 48 -1.6 1.6])
+% subplot(6,1,3)
+% plot([8:16:48],C(4:6),'+','LineWidth',2)
+% axis([0 48 -0.6 0.6])
+% subplot(6,1,4)
+% plot([4:8:48],C(7:12),'+','LineWidth',2)
+% axis([0 48 -0.4 0.4])
+% subplot(6,1,5)
+% plot([2:4:48],C(13:24),'+','LineWidth',2)
+% axis([0 48 -0.2 0.2])
+% subplot(6,1,6)
+% plot([1:2:48],C(25:48),'+','LineWidth',2)
+% axis([0 48 -0.2 0.2])
\ No newline at end of file
--- /dev/null
+%Dessins prétraitements
+script
+
+XC=X2(:,1)-mean(X2(:,1));
+
+[C1,L1] = wavedec(X2(:,1),4,'haar');
+xProj1=C(4:12);
+
+[C2,L2]=wavedec(XC',4,'haar');
+xProj2=C(1:12);
+
+Xrecon1 = waverec([zeros(1,3),xProj1',zeros(1,36)],L1,'haar');
+Xrecon2 = waverec([xProj2',zeros(1,36)],L2,'haar');
+
+plot(X2(:,1),'LineWidth',2)
+hold on
+plot(Xrecon1,'r-o','LineWidth',2)
+plot(Xrecon2,'g-s','LineWidth',2)
+xlabel('Instant of the day','FontSize', 30)
+ylabel(['Load consumption and its reconstructions' sprintf('\n') ' after projecting and preprocessing'],'FontSize', 30)
+set(gca, 'FontSize', 20, 'fontName','Times');
+legend('Original signal','Reconstruction after projection and preprocessing 1','Reconstruction after projection and preprocessing 2','Location','NorthWest')
\ No newline at end of file
--- /dev/null
+ychap=zeros(n,m,k,94);
+%Ychap=zeros(n,m,l,94);
+
+for LL=1:10
+ for i=1:n
+ for r=1:2
+ ychap(i,:,r,LL)=x(i,:)*(phitrue(:,:,r,LL)*inv(rhotrue(:,:,r,LL)))';
+ Ychap(i,:,r,LL)=waverec(ychap(i,:,r,LL)',L,'sym4');
+ end
+ end
+end
+for i=1:n
+ for r=1:2
+ for LL=1:10
+ RMSE(i,LL,r)=sqrt(sum((donneesC(78:96,i)-Ychap(i,:,r,LL)').^2));
+ end
+ end
+end
+LL=88;
+plot(Ychap(89,:,2,LL),'r')
+hold on
+plot(donnees(49:96,89)/100,'b')
+hold off
+
+
+for LL=1:10
+ Z(LL,:)=principeMAP(Y,X,phiLassoMLE(:,:,:,LL),rhoLassoMLE(:,:,:,LL),piLassoMLE(:,LL),3.14);
+ sum((Z(1:100)==1)+(Z(101:200)==2))
+end
\ No newline at end of file
--- /dev/null
+beta = betaLassoMLE(:,:,1:2,ind);
+for i=1:338
+ for r=1:2
+ YChap(:,i,r) = X(i,:) * beta(:,:,r);
+ yChap(:,i,r) = waverec([zeros(1,3),YChap(:,i,r)',zeros(1,36)],L,'haar');
+ end
+end
--- /dev/null
+ind=indiceLassoMLE([2,3,4,160,165])
+cpt=0;
+for L2=ind
+ cpt = cpt+1
+% figure(L2)
+% hold on
+% for r=1:max(b(:,L2))
+% subplot(2,ceil(max(b(:,L2))/2),r)
+% plot(X2Final(:,b(:,L2)==r),c(r))
+% end
+% hold off
+ figure(1)
+ subplot(2,3,cpt)
+ hold on
+ for r=1:max(b(:,L2))
+ plot([mean(X2Final(:,b(:,L2)==r),2);mean(Y2Final(:,(b(:,L2)==r)),2)],c(r))
+ title('signal moyen de chaque classe, pour chaque modele')
+ end
+ hold off
+
+end
\ No newline at end of file
--- /dev/null
+%Saut de dimension
+figure(1)
+vec=[0,10:100:1210];
+
+for r=vec
+ tableauBis(:,:,r+1) = tableauLassoMLE;
+ [aaaa,bbbb]= min(tableauLassoMLE(:,4)+r*tableauLassoMLE(:,2));
+ tableauBis(:,4,r+1) = tableauLassoMLE(:,4)+r*tableauLassoMLE(:,2);
+ reponse(r+1)=tableauLassoMLE(bbbb,3);
+end
+
+[x1,y1] = stairs(vec,reponse(vec+1))
+plot(x1,y1,'LineWidth',2)
+xlabel('$\kappa$','Interpreter','LaTex','FontSize', 45)
+ylabel('Model dimension','FontSize', 45)
+set(gca, 'FontSize', 40, 'fontName','Times');
+[Cx1,IA,IC] = unique(y1,'stable')
+hold on
+plot([x1(IA(2))-0.001,x1(IA(2))],[0,Cx1(2)],'--','LineWidth',2)
+plot([2*x1(IA(2))-0.001,2*x1(IA(2))],[0,Cx1(3)],'--','LineWidth',2)
+%plot([0,2*x1(IA(2))],[Cx1(3),Cx1(3)],'--','LineWidth',2)
+set(gca,'Box','off')
+set(gca,'YTick',unique([0:800:700]))
+set(gca,'XTick',unique([0:1300:1200]))
+text(x1(IA(2))-20,-20,'$\hat{\kappa}$','Interpreter','LaTex','FontSize',45)
+text(2*x1(IA(2))-20,-20,'$2 \hat{\kappa}$','Interpreter','LaTex','FontSize',45)
+%text(-80,Cx1(3),'$\hat{m}$','Interpreter','LaTex','FontSize',45)
+axis([0 1200 0 650])
+hold off
+
+%LLF pénalisé
+
+% figure(2)
+% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+0*tableauLassoMLE(:,2),'.','markersize', 20)
+% xlabel('Model dimension','FontSize', 30)
+% ylabel('Log-likelihood','Interpreter','LaTex','FontSize', 30)
+% set(gca, 'FontSize', 20, 'fontName','Times');
+figure(3)
+ind = find(tableauLassoMLE(:,4) >4000);
+tableauLassoMLE(ind,:)=[];
+plot(tableauLassoMLE(2:end,3),tableauLassoMLE(2:end,4)+1100*tableauLassoMLE(2:end,2),'.','markersize', 30)
+indiceInt = find(tableauLassoMLE(:,4)+1100*tableauLassoMLE(:,2)<-4401);
+indiceBis = find(tableauLassoMLE(:,3)==53);
+hold on
+plot(tableauLassoMLE(indiceInt,3),tableauLassoMLE(indiceInt,4)+1100*tableauLassoMLE(indiceInt,2),'sr','markersize', 30,'linewidth',5)
+plot(tableauLassoMLE(indiceBis,3),tableauLassoMLE(indiceBis,4)+1100*tableauLassoMLE(indiceBis,2),'dg','markersize', 30,'linewidth',5)
+xlabel('Model dimension','Interpreter','LaTex','FontSize', 45)
+ylabel('Penalized log-likelihood for $2 \hat{\kappa}$','Interpreter','LaTex','FontSize', 45)
+set(gca, 'FontSize', 30, 'fontName','Times');
+set(gcf,'Units','normal')
+set(gca,'Position',[.1 .1 .88 .85])
+hold off
+% figure(4)
+% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+3000*tableauLassoMLE(:,2),'.','markersize', 20)
+% xlabel('Model dimension','FontSize', 30)
+% ylabel('Penalized log-likelihood for $\kappa$ = 3000','Interpreter','LaTex','FontSize', 30)
+% set(gca, 'FontSize', 20, 'fontName','Times');
+%
+% figure(5)
+% n2 = find(piLassoMLE(3,:)~=0,1)-1;
+% n3 = find(piLassoMLE(4,:)~=0,1)-1;
+% plot(tableauLassoMLE(:,3),tableauLassoMLE(:,4)+0*tableauLassoMLE(:,2),'.','markersize', 20)
+% xlabel('Model dimension','FontSize', 30)
+% ylabel('Log-likelihood','Interpreter','LaTex','FontSize', 30)
+% set(gca, 'FontSize', 20, 'fontName','Times');
\ No newline at end of file
--- /dev/null
+%clear all
+load donneesSelec.mat
+%donnees=[ind100,res100];
+donnees = BB;
+Res=mean(donnees,2);
+for i=1:349
+ X2(:,i)=Res(48*(i-1)+1:48*i);
+end
+for i=1:348
+ signal(:,i)=[X2(:,i);X2(:,i+1)];
+end
+
+[p1,n1]=size(X2);
+for i=1:n1
+ for j=1:p1
+ XC(i,j)=X2(j,i)-mean(X2(:,i));
+ end
+
+ [C,L]=wavedec(XC(i,:)',4,'haar');
+ xProj(i,:)=C(1:12);
+end
+
+X=xProj(1:end-2,:);
+Y=xProj(2:end-1,:);
+for i=1:n1
+ Xrecon(i,:)=waverec([xProj(i,:),zeros(1,36)],L,'haar');
+end
+for i=1:348
+ signal3(:,i)=[Xrecon(i,:),Xrecon(i+1,:)];
+end
--- /dev/null
+%clear all
+load donneesSelec.mat
+donnees=BB;
+
+Res=mean(donnees,2);
+%Ind=mean(donnees(:,1:100),2);
+for i=1:340
+ X2(:,i)=Res(48*(i-1)+1:48*i);
+end
+for i=1:339
+ signal(:,i)=[X2(:,i);X2(:,i+1)];
+end
+
+[p1,n1]=size(X2);
+for i=1:n1
+ [C,L]=wavedec(X2(:,i)',4,'haar');
+ xProj(i,:)=C(4:12);
+end
+
+X=xProj(1:end-2,:);
+Y=xProj(2:end-1,:);
+for i=1:n1
+ Xrecon(i,:)=waverec([zeros(1,3),xProj(i,:),zeros(1,36)],L,'haar');
+end
+
+for i=1:339
+ signal2(:,i)=[Xrecon(i,:),Xrecon(i+1,:)];
+end
--- /dev/null
+for i = 1:100
+ courbeX(i,:) = data(((i-1)*96+1):((i-1)*96+48));
+ courbeY(i,:) = data(((i-1)*96+49):((i-1)*96+96));
+end
+plot((courbeX)')
+figure(2)
+plot((courbeY)')
+
+X2 = (courbeX)'
\ No newline at end of file
--- /dev/null
+library("mclust")
+#library("R.matlab", lib.loc="~/R/x86_64-pc-linux-gnu-library/3.3")
+#redrawData = TRUE
+#if (redrawData==TRUE){
+ ###########
+ ## Model
+ ###########
+ K = 2
+ p = 48
+ T = seq(0,1.5,length.out = p)
+ T2 = seq(0,3, length.out = 2*p)
+ n = 100
+ x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1
+ plot(T,x1)
+ lines(T,x1)
+
+ sigmaX = 0.12
+ sigmaY = 0.12
+ beta = list()
+ p1= 0.5
+ beta[[1]] =diag(c(rep(p1,5),rep(1,5), rep(p1,5), rep(1, p-15)))
+ p2 = 1
+ beta[[2]] = diag(c(rep(p2,5),rep(1,5), rep(p2,5), rep(1, p-15)))
+ ITE = 100
+ ARI1 = ARI2 = ARI3 = rep(0,ITE)
+ XY = array(0, dim = c(ITE, 2*p,n))
+ XYproj = array(0, dim=c(ITE, 96,n))
+
+ affec = list()
+ ###########
+ ## Iterations
+ ###########
+ for (ite in c(1:ITE)){
+ ###########
+ ##Sample
+ ###########
+ x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n)
+ affec[[ite]] = sample(c(1,2), n, replace = TRUE)
+ y = x
+ xy = matrix(0,ncol=n, nrow= 2*p)
+ for (i in c(1:n)){
+ y[,i] = x[,i] %*% beta[[affec[[ite]][i]]] + rnorm(p, 0, sigmaY)
+ xy[,i] = c(x[,i],y[,i])
+ XY[ite,,i] = xy[,i] - mean(xy[,i])
+ # Dx = dwt(x[,i], filter='haar')@W
+ # Dx = rev(unlist(Dx))
+ # Dx = Dx[2:(1+3+6+12+24)]
+ # Ax = dwt(x[,i], filter='haar')@V
+ # Ax = rev(unlist(Ax))
+ # Ax = Ax[2:(1+3)]
+ # Dy = dwt(y[,i], filter='haar')@W
+ # Dy = rev(unlist(Dy))
+ # Dy = Dy[2:(1+3+6+12+24)]
+ # Ay = dwt(y[,i], filter='haar')@V
+ # Ay = rev(unlist(Ay))
+ # Ay = Ay[2:(1+3)]
+ # XYproj[ite,,i] = c(Ax,Dx,Ay,Dy)
+ }
+ print(ite)
+ #
+ #
+ }
+ xy[c(7,55),] = NA
+ # write.table(XY,'data.csv', row.names=FALSE, col.names=FALSE)
+matplot(T2,xy[,affec[[ite]]==1],type='l', col='red', lty = 1)
+matplot(T2,xy[,affec[[ite]]==2],type='l', col='black', add=TRUE, lty= 1)
+abline(v = 1.5)
+text(0.75,0,'X', cex = 2 )
+text(0.75+1.5,0,'Y', cex = 2 )
+#proj = read.table('dataProj.csv')
+#}
+
+
+#matplot(T,x,type='l', col='black', xlab = '', ylab='', lwd=1.5,lty=1)
+#matplot(T,y[,affec[[ite]]==1],type='l', col='red', xlab = '', ylab='', lwd=1.5,lty=1)
+#matplot(T,y[,affec[[ite]]==2],type='l', col='black', add=TRUE,lwd=2,lty=1)
+# proj2 = array(0,dim=c(ITE,2*p,n))
+# for (ite in c(1:ITE)){
+# for (i in c(1:n)){
+# A = proj[ite,(1+(i-1)*96):(i*96)]
+# for (j in 1:96){
+# proj2[ite,j,i] = A[1,j]
+# }
+# }
+# print(ite)
+# }
+###########
+## Iterations
+###########
+Kmod2 = Kmod1 = rep(0,ITE)
+Kmod3 = rep(0,ITE)
+for (ite in c(1:ITE)){
+ print(ite)
+ ###########
+ ## k-means 1
+ ###########
+ mod1 = Mclust(t(XY[ite,,]),G = 1:2, mode='VII')
+ ARI1[ite] = adjustedRandIndex(mod1$classification, affec[[ite]])
+ Kmod1[ite] = mod1$G
+ # ###########
+ # ## k-means 2
+ # ###########
+ # #proj2 =
+ # mod2 = Mclust(t(XYproj[ite,,]),G = 1:8, mode='VII')
+ # ARI2[ite] = adjustedRandIndex(mod2$classification, affec[[ite]])
+ # Kmod2[ite] = mod2$G
+ # ###########
+ # ## k-means 1
+ # ###########
+ # #proj3 =
+ # mod3 = Mclust(t(XYproj[ite,c(4:12,52:60),]),G = 1:8, mode='VII')
+ # ARI3[ite] = adjustedRandIndex(mod3$classification, affec[[ite]])
+ # Kmod3[ite] = mod3$G
+}
+ARI0 = rep(1,ITE)
+par(cex.lab=1.5)
+par(cex.axis=1.5)
+boxplot(ARI0,ARI1, names = c('LassoMLE','K-means'), lwd=1.3)
+table(Kmod1)
+table(Kmod2)
--- /dev/null
+p = 10
+q = 8
+k = 2
+D = 20
+
+meanX = matrix(nrow=p,ncol=k)
+meanX[,1] = rep(0,p)
+meanX[,2] = rep(1,p)
+
+covX = array(dim=c(p,p,k))
+covX[,,1] = 0.1*diag(p)
+covX[,,2] = 0.5*diag(p)
+
+covY = array(dim = c(q,q,k))
+covY[,,1] = 0.1*diag(q)
+covY[,,2] = 0.2*diag(q)
+
+beta = array(dim = c(p,q,2))
+beta[,,2] = matrix(c(rep(2,(D)),rep(0, p*q-D)))
+beta[,,1] = matrix(c(rep(1,D),rep(0, p*q-D)))
+
+n = 100
+
+pi = c(0.4,0.6)
+
+data = generateXY(meanX,covX,covY, pi, beta, n)
+
+X = data$X
+Y = data$Y
+
+res_valse = valse(X,Y)
--- /dev/null
+### Regression matrices
+model = Res
+K = dim(model$phi)[3]
+valMax = max(abs(model$phi))
+
+require(fields)
+
+if (K<4){
+ par(mfrow = c(1,K))
+} else op = par(mfrow = c(2, (K+1)/2))
+
+## Phi
+
+for (r in 1:K){
+ image.plot(t(abs(model$phi[,,r])),
+ col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66))
+}
+par(mfrow = c(1,K),oma = c(0,0,3,0))
+mtext("Regression matrices in each cluster", side=3, line=4, font=2, cex=2, col='red')
+
+par(mfrow = c(1,2), oma=c(0,0,3,0))
+for (i in 1:4)
+ plot(runif(20), runif(20),
+ main=paste("random plot (",i,")",sep=''))
+par(op)
+mtext("Four plots",
+ side=3, line=4, font=2, cex=2, col='red')
+
+### Zoom onto two classes we want to compare
+kSel = c(1,2)
+par(mfrow = c(1,3))
+
+for (r in kSel){
+ image.plot(t(abs(model$phi[,,r])),xaxt="n",yaxt="n",
+ col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66))
+}
+image.plot(t(abs(model$phi[,,kSel[1]]-model$phi[,,kSel[2]])),
+ col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66))
+
+### Covariance matrices
+par(mfrow = c(K, 1))
+for (r in 1:K){
+ image.plot(matrix(diag(model$rho[,,r]), ncol= 1),
+ col=gray(rev(seq(0,64,length.out=65))/65),breaks=seq(0,valMax,length.out=66))
+}
+
+### proportions
+Gam = matrix(0, ncol = K, nrow = n)
+gam = Gam
+for (i in 1:n){
+ for (r in 1:K){
+ sqNorm2 = sum( (Y[i,]%*%model$rho[,,r]-X[i,]%*%model$phi[,,r])^2 )
+ Gam[i,r] = model$pi[r] * exp(-0.5*sqNorm2)* det(model$rho[,,r])
+ }
+ gam[i,] = Gam[i,] / sum(Gam[i,])
+}
+affec = apply(gam, 1,which.max)
+gam2 = matrix(NA, ncol = K, nrow = n)
+for (i in 1:n){
+ gam2[i, affec[i]] = gam[i, affec[i]]
+}
+boxplot(gam2)
+
+### Mean in each cluster
+XY = cbind(X,Y)
+XY_class= list()
+meanPerClass= matrix(0, ncol = K, nrow = dim(XY)[2])
+for (r in 1:K){
+ XY_class[[r]] = XY[affec == r, ]
+ meanPerClass[,r] = apply(XY_class[[r]], 2, mean)
+}
+
+matplot(meanPerClass, type='l')
--- /dev/null
+simulData_17mars = function(ite){
+ set.seed = 22021989+ite
+
+ ###########
+ ## Modele
+ ###########
+ K = 2
+ p = 48
+ T = seq(0,1.5,length.out = p)
+ T2 = seq(0,3, length.out = 2*p)
+ n = 100
+ x1 = cos(2*base::pi*T) + 0.2*cos(4*2*base::pi*T) + 0.3*c(rep(0,round(length(T)/7)),rep(1,round(length(T)*(1-1/7))))+1
+ sigmaX = 0.12
+ sigmaY = 0.12
+ beta = list()
+ p1= 0.5
+ beta[[1]] =diag(c(rep(p1,5),rep(1,5), rep(p1,5), rep(1, p-15)))
+ p2 = 2
+ beta[[2]] = diag(c(rep(p2,5),rep(1,5), rep(p2,5), rep(1, p-15)))
+ ARI1 = ARI2 = ARI3 = 0
+
+ ###########
+ ## Data + Projection
+ ###########
+ require(wavelets)
+ XY = array(0, dim = c(2*p,n))
+ XYproj = array(0, dim=c(96,n))
+ x = x1 + matrix(rnorm(n*p, 0, sigmaX), ncol = n)
+ affec = sample(c(1,2), n, replace = TRUE)
+ y = x
+ xy = matrix(0,ncol=n, nrow= 2*p)
+ for (i in c(1:n)){
+ y[,i] = x[,i] %*% beta[[affec[i]]] + rnorm(p, 0, sigmaY)
+ xy[,i] = c(x[,i],y[,i])
+ XY[,i] = xy[,i] - mean(xy[,i])
+ Dx = dwt(x[,i], filter='haar')@W
+ Dx = rev(unlist(Dx))
+ Dx = Dx[2:(1+3+6+12+24)]
+ Ax = dwt(x[,i], filter='haar')@V
+ Ax = rev(unlist(Ax))
+ Ax = Ax[2:(1+3)]
+ Dy = dwt(y[,i], filter='haar')@W
+ Dy = rev(unlist(Dy))
+ Dy = Dy[2:(1+3+6+12+24)]
+ Ay = dwt(y[,i], filter='haar')@V
+ Ay = rev(unlist(Ay))
+ Ay = Ay[2:(1+3)]
+ XYproj[,i] = c(Ax,Dx,Ay,Dy)
+ }
+
+ res_valse = valse(t(x),t(y), kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 1000)
+ res_valse_proj = valse(t(XYproj[1:p,]),t(XYproj[(p+1):(2*p),]), kmax=2, verbose=TRUE, plot=FALSE, size_coll_mod = 1000)
+
+ save(res_valse,file=paste("Res_",ite, ".RData",sep=""))
+ save(res_valse_proj,file=paste("ResProj_",ite, ".RData",sep=""))
+}
--- /dev/null
+test.*
+!test.*.c
+/data/
+vgcore.*
--- /dev/null
+CC = gcc
+CFLAGS = -g -std=gnu99 -Wno-implicit-function-declaration
+LDFLAGS = -lm -lgsl -lcblas
+TEST_LDFLAGS = -L. libvalse_core.so
+LIB = libvalse_core.so
+LIB_SRC = $(wildcard ../pkg/src/sources/*.c)
+LIB_OBJ = $(LIB_SRC:.c=.o)
+INCLUDES = -I../pkg/src/sources
+TESTS = test.EMGLLF test.EMGrank
+
+all: $(LIB) $(TESTS)
+
+$(LIB): $(LIB_OBJ)
+ $(CC) -shared -o $@ $^ $(LDFLAGS)
+
+test.EMGLLF: $(LIB) test.EMGLLF.o test_utils.o
+ $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS)
+
+test.EMGrank: $(LIB) test.EMGrank.o test_utils.o
+ $(CC) -o $@ $^ $(LDFLAGS) $(TEST_LDFLAGS)
+
+%.o: %.c
+ $(CC) -fPIC -o $@ -c $< $(CFLAGS) $(INCLUDES)
+
+clean:
+ rm -f *.o ../pkg/src/sources/*.o
+
+cclean: clean
+ rm -f *.so ../pkg/src/*.so $(TESTS)
+
+.PHONY: all clean cclean
--- /dev/null
+source("helper.R")
+library(valse)
+
+generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1., lambda=0.5, eps=1e-6)
+{
+ testFolder = "./data/"
+ dir.create(testFolder, showWarnings=FALSE, mode="0755")
+
+ params = basicInitParameters(n, p, m, k)
+ xy = generateXYdefault(n, p, m, k)
+
+ #save inputs
+ write.table(as.double(params$phiInit), paste(testFolder,"phiInit",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(params$rhoInit), paste(testFolder,"rhoInit",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(params$piInit), paste(testFolder,"piInit",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(params$gamInit), paste(testFolder,"gamInit",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.integer(mini), paste(testFolder,"mini",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.integer(maxi), paste(testFolder,"maxi",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(gamma), paste(testFolder,"gamma",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(lambda), paste(testFolder,"lambda",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(xy$X), paste(testFolder,"X",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(xy$Y), paste(testFolder,"Y",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(eps), paste(testFolder,"eps",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.integer(c(n,p,m,k)), paste(testFolder,"dimensions",sep=""),
+ row.names=F, col.names=F)
+
+ res = valse::EMGLLF(params$phiInit,params$rhoInit,params$piInit,params$gamInit,mini,
+ maxi,gamma,lambda,xy$X,xy$Y,eps,fast=FALSE)
+
+ #save outputs
+ write.table(as.double(res$phi),paste(testFolder,"phi",sep=""),row.names=F,col.names=F)
+ write.table(as.double(res$rho),paste(testFolder,"rho",sep=""),row.names=F,col.names=F)
+ write.table(as.double(res$pi),paste(testFolder,"pi",sep=""),row.names=F,col.names=F)
+ write.table(as.double(res$llh),paste(testFolder,"llh",sep=""),row.names=F,col.names=F)
+ write.table(as.double(res$S),paste(testFolder,"S",sep=""),row.names=F,col.names=F)
+ write.table(as.integer(res$affec),paste(testFolder,"affec",sep=""),row.names=F,col.names=F)
+}
--- /dev/null
+source("helper.R")
+library(valse)
+
+generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, gamma=1.0, rank = c(1,2,4))
+{
+ eps = 1e-6
+ Pi = rep(1.0/k, k)
+ Rho = array(dim=c(m,m,k))
+ for(i in 1:k)
+ Rho[,,i] = diag(1,m)
+ xy = generateXYdefault(n, p, m, k)
+
+ testFolder = "./data/"
+ dir.create(testFolder, showWarnings=FALSE, mode="0755")
+ #save inputs
+ write.table(as.double(Pi), paste(testFolder,"Pi",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(Rho), paste(testFolder,"Rho",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.integer(mini), paste(testFolder,"mini",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.integer(maxi), paste(testFolder,"maxi",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(xy$X), paste(testFolder,"X",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(xy$Y), paste(testFolder,"Y",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.double(eps), paste(testFolder,"eps",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.integer(rank), paste(testFolder,"rank",sep=""),
+ row.names=F, col.names=F)
+ write.table(as.integer(c(n,p,m,k)), paste(testFolder,"dimensions",sep=""),
+ row.names=F, col.names=F)
+
+ res = valse::EMGrank(Pi,Rho,mini,maxi,xy$X,xy$Y,eps,rank,fast=FALSE)
+
+ #save output
+ write.table(as.double(res$phi),paste(testFolder,"phi",sep=""),row.names=F,col.names=F)
+ write.table(as.double(res$LLF),paste(testFolder,"LLF",sep=""),row.names=F,col.names=F)
+}
--- /dev/null
+#' Generate a sample of (X,Y) of size n with default values
+#'
+#' @param n sample size
+#' @param p number of covariates
+#' @param m size of the response
+#' @param k number of clusters
+#'
+#' @return list with X and Y
+#'
+generateXYdefault = function(n, p, m, k)
+{
+ meanX = rep(0, p)
+ covX = diag(p)
+ covY = array(dim=c(m,m,k))
+ for(r in 1:k)
+ covY[,,r] = diag(m)
+ π = rep(1./k,k)
+ #initialize beta to a random number of non-zero random value
+ β = array(0, dim=c(p,m,k))
+ for (j in 1:p)
+ {
+ nonZeroCount = sample(1:m, 1)
+ β[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
+ }
+
+ sample_IO = generateXY(n, π, meanX, β, covX, covY)
+ return (list(X=sample_IO$X,Y=sample_IO$Y))
+}
+
+#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for
+#' weights, identity for covariance matrices, and uniformly distributed for the
+#' clustering)
+#'
+#' @param n sample size
+#' @param p number of covariates
+#' @param m size of the response
+#' @param k number of clusters
+#'
+#' @return list with phiInit, rhoInit,piInit,gamInit
+#'
+basicInitParameters = function(n,p,m,k)
+{
+ phiInit = array(0, dim=c(p,m,k))
+
+ piInit = (1./k)*rep(1,k)
+
+ rhoInit = array(dim=c(m,m,k))
+ for (i in 1:k)
+ rhoInit[,,i] = diag(m)
+
+ gamInit = 0.1 * matrix(1, nrow=n, ncol=k)
+ R = sample(1:k, n, replace=TRUE)
+ for (i in 1:n)
+ gamInit[i,R[i]] = 0.9
+ gamInit = gamInit/sum(gamInit[1,])
+
+ return (list("phiInit"=phiInit, "rhoInit"=rhoInit, "piInit"=piInit, "gamInit"=gamInit))
+}
--- /dev/null
+#!/bin/sh
+set -e
+
+#Testing procedure for EMGLLF (inside this folder):
+
+algo=$1 #EMGLLF or EMGrank,
+ #second arg indicate if rebuild or rebuild+clean requested
+
+if [ "$2" == 'c' ]; then
+ #0.1) Clean package + C testing code
+ find ../pkg/man/ -type f ! -name 'valse-package.Rd' -delete
+ rm -f ../pkg/NAMESPACE
+ # Erase object and library files
+ rm -f ../pkg/src/*.so
+ rm -f ../pkg/src/adapters/*.o
+ make cclean
+fi
+
+if [ "$2" == 'r' ] || [ "$2" == 'c' ]; then
+ #0.2) Install current version of the package (WARNING: roxygen 2 v5.0.1)
+ # --> devtools::install_github('klutometis/roxygen@v5.0.1')
+ echo "setwd('../pkg');library(roxygen2);roxygenize('.')" | R --slave
+ R CMD INSTALL ../pkg
+fi
+
+#1) Generate data using R versions of EMGLLF/EMGrank (slow, but trusted)
+echo -e "source('generateRunSaveTest_$algo.R');\n \
+ # I'm happy with default values - feel free to give args\n \
+ generateRunSaveTest_$algo() " \
+ | R --slave
+
+#2) Compile test C code into an executable named "test.$algo"
+make test.$algo
+
+#3) Run it with valgrind!
+#valgrind
+./test.$algo
--- /dev/null
+m=6
+p=6
+
+covY = array(0,dim = c(m,m,2))
+covY[,,1] = diag(m)
+covY[,,2] = diag(m)
+
+Beta = array(0, dim = c(p, m, 2))
+Beta[1:4,1:4,1] = 3*diag(4)
+Beta[1:4,1:4,2] = -2*diag(4)
+
+Data = generateXY(200, c(0.5,0.5), rep(0,p), Beta, diag(p), covY)
+#
+Res = valse(Data$X,Data$Y, fast=TRUE, plot=FALSE, verbose = TRUE, kmax=3, size_coll_mod = 50, selecMod = "DDSE", mini = 50, maxi=100)
+plot(Res$tableau[,3], -Res$tableau[,4])
--- /dev/null
+#include "EMGLLF.h"
+#include "test_utils.h"
+#include <stdlib.h>
+
+int main(int argc, char** argv)
+{
+ int* dimensions = readArray_int("dimensions");
+ int n = dimensions[0];
+ int p = dimensions[1];
+ int m = dimensions[2];
+ int k = dimensions[3];
+ free(dimensions);
+
+ ////////////
+ // INPUTS //
+ Real* phiInit = readArray_real("phiInit");
+ Real* rhoInit = readArray_real("rhoInit");
+ Real* piInit = readArray_real("piInit");
+ Real* gamInit = readArray_real("gamInit");
+ int mini = read_int("mini");
+ int maxi = read_int("maxi");
+ Real gamma = read_real("gamma");
+ Real lambda = read_real("lambda");
+ Real* X = readArray_real("X");
+ Real* Y = readArray_real("Y");
+ Real eps = read_real("eps");
+ ////////////
+
+ /////////////
+ // OUTPUTS //
+ Real* phi = (Real*)malloc(p*m*k*sizeof(Real));
+ Real* rho = (Real*)malloc(m*m*k*sizeof(Real));
+ Real* pi = (Real*)malloc(k*sizeof(Real));
+ Real llh;
+ Real* S = (Real*)malloc(p*m*k*sizeof(Real));
+ int* affec = (int*)malloc(n*sizeof(int));
+ /////////////
+
+ ////////////////////
+ // Call to EMGLLF //
+ EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,
+ phi,rho,pi,&llh,S,affec,
+ n,p,m,k);
+ ////////////////////
+
+ free(phiInit);
+ free(rhoInit);
+ free(piInit);
+ free(gamInit);
+ free(X);
+ free(Y);
+
+ // Compare to reference outputs
+ Real* ref_phi = readArray_real("phi");
+ compareArray_real("phi", phi, ref_phi, p*m*k);
+ free(phi);
+ free(ref_phi);
+
+ Real* ref_rho = readArray_real("rho");
+ compareArray_real("rho", rho, ref_rho, m*m*k);
+ free(rho);
+ free(ref_rho);
+
+ Real* ref_pi = readArray_real("pi");
+ compareArray_real("pi", pi, ref_pi, k);
+ free(pi);
+ free(ref_pi);
+
+ Real ref_llh = read_real("llh");
+ compareArray_real("llh", &llh, &ref_llh, 1);
+
+ Real* ref_S = readArray_real("S");
+ compareArray_real("S", S, ref_S, p*m*k);
+ free(S);
+ free(ref_S);
+
+ int* ref_affec = readArray_int("affec");
+ compareArray_int("affec", affec, ref_affec, n);
+ free(affec);
+ free(ref_affec);
+
+ return 0;
+}
--- /dev/null
+#include "EMGrank.h"
+#include "test_utils.h"
+#include <stdlib.h>
+
+int main(int argc, char** argv)
+{
+ int* dimensions = readArray_int("dimensions");
+ int n = dimensions[0];
+ int p = dimensions[1];
+ int m = dimensions[2];
+ int k = dimensions[3];
+ free(dimensions);
+
+ ////////////
+ // INPUTS //
+ Real* Rho = readArray_real("Rho");
+ Real* Pi = readArray_real("Pi");
+ int mini = read_int("mini");
+ int maxi = read_int("maxi");
+ Real* X = readArray_real("X");
+ Real* Y = readArray_real("Y");
+ Real eps = read_real("eps");
+ int* rank = readArray_int("rank");
+ ////////////
+
+ /////////////
+ // OUTPUTS //
+ Real* phi = (Real*)malloc(p*m*k*sizeof(Real));
+ Real* LLF = (Real*)malloc(1*sizeof(Real));
+ /////////////
+
+ //////////////////////////
+ // Main call to EMGrank //
+ EMGrank_core(Pi,Rho,mini,maxi,X,Y,eps,rank,
+ phi,LLF,
+ n,p,m,k);
+ //////////////////////////
+
+ free(Rho);
+ free(Pi);
+ free(X);
+ free(Y);
+ free(rank);
+
+ // Compare to reference outputs
+ Real* ref_phi = readArray_real("phi");
+ compareArray_real("phi", phi, ref_phi, p*m*k);
+ free(phi);
+ free(ref_phi);
+
+ // LLF
+ Real* ref_LLF = readArray_real("LLF");
+ compareArray_real("LLF", LLF, ref_LLF, 1);
+ free(LLF);
+ free(ref_LLF);
+
+ return 0;
+}
--- /dev/null
+library(valse)
+testFolder = "data/"
+
+# NOTE: R typing is really terrible. as.double as.matrix ...and so on; don't remove.
+
+#get inputs
+npmk = as.matrix(read.table(paste(testFolder,"dimensions",sep="")))
+n = npmk[1]; p=npmk[2]; m=npmk[3]; k=npmk[4]
+phiInit = array(as.double(as.matrix(read.table(paste(testFolder,"phiInit",sep="")))), dim=c(p,m,k))
+rhoInit = array(as.double(as.matrix(read.table(paste(testFolder,"rhoInit",sep="")))), dim=c(m,m,k))
+piInit = as.double(as.matrix(read.table(paste(testFolder,"piInit",sep="")))[,])
+gamInit = matrix(as.double(as.matrix(read.table(paste(testFolder,"gamInit",sep="")))), n,k)
+mini = as.integer(as.matrix(read.table(paste(testFolder,"mini",sep="")))[1])
+maxi = as.integer(as.matrix(read.table(paste(testFolder,"maxi",sep="")))[1])
+gamma = as.double(as.matrix(read.table(paste(testFolder,"gamma",sep="")))[1])
+lambda = as.double(as.matrix(read.table(paste(testFolder,"lambda",sep="")))[1])
+X = matrix(as.double(as.matrix(read.table(paste(testFolder,"X",sep="")))), n,p)
+Y = matrix(as.double(as.matrix(read.table(paste(testFolder,"Y",sep="")))), n,m)
+eps = as.double(as.matrix(read.table(paste(testFolder,"eps",sep="")))[1])
+
+#get outputs
+phi = array(as.double(as.matrix(read.table(paste(testFolder,"phi",sep="")))), dim=c(p,m,k))
+rho = array(as.double(as.matrix(read.table(paste(testFolder,"rho",sep="")))), dim=c(m,m,k))
+pi = as.double(as.matrix(read.table(paste(testFolder,"pi",sep="")))[,])
+llh = as.double(as.matrix(read.table(paste(testFolder,"llh",sep="")))[1])
+S = array(as.double(as.matrix(read.table(paste(testFolder,"S",sep="")))), dim=c(p,m,k))
+affec = as.double(as.matrix(read.table(paste(testFolder,"affec",sep="")))[,])
+
+res = valse::EMGLLF(
+ phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,eps,fast=TRUE)
+
+#compare outputs
+nd=7 #number of digits
+print( all(round(phi,nd) == round(res$phi,nd)) )
+print( all(round(rho,nd) == round(res$rho,nd)) )
+print( all(round(pi,nd) == round(res$pi,nd)) )
+print( all(round(llh,nd) == round(res$llh,nd)) )
+print( all(round(S,nd) == round(res$S,nd)) )
+print( all(affec == res$affec) )
--- /dev/null
+library(valse)
+testFolder = "data/"
+
+# NOTE: R typing is really terrible. as.double as.matrix ...and so on; don't remove.
+
+#get inputs
+npmk = as.matrix(read.table(paste(testFolder,"dimensions",sep="")))
+n = npmk[1]; p=npmk[2]; m=npmk[3]; k=npmk[4]
+Pi = as.double(as.matrix(read.table(paste(testFolder,"Pi",sep="")))[,])
+Rho = array(as.double(as.matrix(read.table(paste(testFolder,"Rho",sep="")))), dim=c(m,m,k))
+mini = as.integer(as.matrix(read.table(paste(testFolder,"mini",sep="")))[1])
+maxi = as.integer(as.matrix(read.table(paste(testFolder,"maxi",sep="")))[1])
+X = matrix(as.double(as.matrix(read.table(paste(testFolder,"X",sep="")))), n,p)
+Y = matrix(as.double(as.matrix(read.table(paste(testFolder,"Y",sep="")))), n,m)
+eps = as.double(as.matrix(read.table(paste(testFolder,"eps",sep="")))[1])
+rank = as.double(as.matrix(read.table(paste(testFolder,"rank",sep="")))[,])
+
+#get outputs
+phi = array(as.double(as.matrix(read.table(paste(testFolder,"phi",sep="")))), dim=c(p,m,k))
+LLF = as.double(as.matrix(read.table(paste(testFolder,"LLF",sep="")))[1])
+
+res = valse::EMGrank(Pi,Rho,mini,maxi,X,Y,eps,rank,fast=TRUE)
+
+#compare outputs
+nd=7 #number of digits
+print( all(round(phi,nd) == round(res$phi,nd)) )
+print( all(round(LLF,nd) == round(res$LLF,nd)) )
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include "utils.h"
+
+// Check if array == refArray
+void compareArray(const char* ID, const void* array, const void* refArray, int size,
+ int isinteger)
+{
+ Real EPS = 1e-5; //precision
+ printf("Checking %s\n",ID);
+ Real maxError = 0.0;
+ for (int i=0; i<size; i++)
+ {
+ Real error = isinteger
+ ? fabs(((int*)array)[i] - ((int*)refArray)[i])
+ : fabs(((Real*)array)[i] - ((Real*)refArray)[i]);
+ if (error >= maxError)
+ maxError = error;
+ }
+ if (maxError >= EPS)
+ printf(" Inaccuracy: max(abs(error)) = %g >= %g\n",maxError,EPS);
+ else
+ printf(" OK\n");
+}
+
+void compareArray_real(const char* ID, const void* array, const void* refArray, int size)
+{
+ return compareArray(ID, array, refArray, size, 0);
+}
+
+void compareArray_int(const char* ID, const void* array, const void* refArray, int size)
+{
+ return compareArray(ID, array, refArray, size, 1);
+}
+
+// Read array by columns (as in MATLAB) and return by-rows encoding
+void* readArray(const char* fileName, int isinteger)
+{
+ // need to prepend 'data/' (not really nice code...)
+ char* fullFileName = (char*)calloc(5+strlen(fileName)+1, sizeof(char));
+ strcat(fullFileName, "data/");
+ strcat(fullFileName, fileName);
+
+ // first pass to know how many elements to allocate
+ char* command = (char*)calloc(12+strlen(fullFileName)+8+1, sizeof(char));
+ strcat(command, "wc -l ");
+ strcat(command, fullFileName);
+ FILE *arraySize = popen(command, "r");
+ free(command);
+ char* bufferNum = (char*)calloc(64, sizeof(char));
+ fgets(bufferNum, sizeof(bufferNum), arraySize);
+ int n = atoi(bufferNum);
+ pclose(arraySize);
+
+ // open file for reading
+ FILE* arrayFile = fopen(fullFileName, "r");
+ free(fullFileName);
+
+ // read all values, and convert them to by-rows matrices format
+ size_t elementSize = isinteger ? sizeof(int) : sizeof(Real);
+ void* array = malloc(n*elementSize);
+ for (int i=0; i<n; i++)
+ {
+ fgets(bufferNum, 64, arrayFile);
+ // transform buffer content into Real or int, and store it at appropriate location
+ if (isinteger)
+ ((int*)array)[i] = atoi(bufferNum);
+ else
+ ((Real*)array)[i] = atof(bufferNum);
+ }
+ fclose(arrayFile);
+ free(bufferNum);
+
+ return array;
+}
+
+int* readArray_int(const char* fileName)
+{
+ return (int*)readArray(fileName, 1);
+}
+
+Real* readArray_real(const char* fileName)
+{
+ return (Real*)readArray(fileName, 0);
+}
+
+int read_int(const char* fileName)
+{
+ int* array = readArray_int(fileName);
+ int res = array[0];
+ free(array);
+ return res;
+}
+
+Real read_real(const char* fileName)
+{
+ Real* array = readArray_real(fileName);
+ Real res = array[0];
+ free(array);
+ return res;
+}
--- /dev/null
+// Check if array == refArray
+void compareArray(const char* ID, const void* array, const void* refArray, int size, int isInteger);
+
+void compareArray_real(const char* ID, const void* array, const void* refArray, int size);
+
+void compareArray_int(const char* ID, const void* array, const void* refArray, int size);
+
+// Read array by columns (as in MATLAB) and return by-rows encoding
+void* readArray(const char* fileName, int isInteger);
+
+int* readArray_int(const char* fileName);
+
+Real* readArray_real(const char* fileName);
+
+int read_int(const char* fileName);
+
+Real read_real(const char* fileName);