+++ /dev/null
--Package: valse
--Title: Variable Selection With Mixture Of Models
--Date: 2016-12-01
--Version: 0.1-0
--Description: Two methods are implemented to cluster data with finite mixture
-- regression models. Those procedures deal with high-dimensional covariates and
-- responses through a variable selection procedure based on the Lasso estimator.
-- A low-rank constraint could be added, computed for the Lasso-Rank procedure.
-- A collection of models is constructed, varying the level of sparsity and the
-- number of clusters, and a model is selected using a model selection criterion
-- (slope heuristic, BIC or AIC). Details of the procedure are provided in 'Model-
-- based clustering for high-dimensional data. Application to functional data' by
-- Emilie Devijver, published in Advances in Data Analysis and Clustering (2016).
--Author: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre],
-- Emilie Devijver <Emilie.Devijver@kuleuven.be> [aut],
-- Benjamin Goehry <Benjamin.Goehry@math.u-psud.fr> [aut]
--Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr>
--Depends:
-- R (>= 3.0.0)
--Imports:
-- MASS,
-- parallel
--Suggests:
-- capushe,
-- roxygen2,
-- testhat
--URL: http://git.auder.net/?p=valse.git
--License: MIT + file LICENSE
--RoxygenNote: 5.0.1
--Collate:
-- 'plot_valse.R'
-- 'main.R'
-- 'selectVariables.R'
-- 'constructionModelesLassoRank.R'
-- 'constructionModelesLassoMLE.R'
-- 'computeGridLambda.R'
-- 'initSmallEM.R'
-- 'EMGrank.R'
-- 'EMGLLF.R'
-- 'generateXY.R'
-- 'A_NAMESPACE.R'
- 'util.R'
+++ /dev/null
--Copyright (c)
-- 2014-2017, Benjamin Auder
-- 2014-2017, Emilie Devijver
-- 2016-2017, Benjamin Goehry
--
--Permission is hereby granted, free of charge, to any person obtaining
--a copy of this software and associated documentation files (the
--"Software"), to deal in the Software without restriction, including
--without limitation the rights to use, copy, modify, merge, publish,
--distribute, sublicense, and/or sell copies of the Software, and to
--permit persons to whom the Software is furnished to do so, subject to
--the following conditions:
--
--The above copyright notice and this permission notice shall be
--included in all copies or substantial portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
--EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
--MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
--NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
--LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
--OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
--WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+++ /dev/null
--#' @include generateXY.R
--#' @include EMGLLF.R
--#' @include EMGrank.R
--#' @include initSmallEM.R
--#' @include computeGridLambda.R
--#' @include constructionModelesLassoMLE.R
--#' @include constructionModelesLassoRank.R
--#' @include selectVariables.R
--#' @include main.R
--#' @include plot_valse.R
--#'
--#' @useDynLib valse
--#'
--#' @importFrom parallel makeCluster parLapply stopCluster clusterExport
--#' @importFrom MASS ginv
--NULL
+++ /dev/null
--#' EMGLLF
--#'
--#' Description de EMGLLF
--#'
--#' @param phiInit an initialization for phi
--#' @param rhoInit an initialization for rho
--#' @param piInit an initialization for pi
--#' @param gamInit initialization for the a posteriori probabilities
--#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
--#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
--#' @param gamma integer for the power in the penaly, by default = 1
--#' @param lambda regularization parameter in the Lasso estimation
--#' @param X matrix of covariates (of size n*p)
--#' @param Y matrix of responses (of size n*m)
--#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
--#'
--#' @return A list ... phi,rho,pi,LLF,S,affec:
--#' phi : parametre de moyenne renormalisé, calculé par l'EM
--#' rho : parametre de variance renormalisé, calculé par l'EM
--#' pi : parametre des proportions renormalisé, calculé par l'EM
--#' LLF : log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres
--#' S : ... affec : ...
--#'
--#' @export
--EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
-- X, Y, eps, fast)
--{
-- if (!fast)
-- {
-- # Function in R
-- return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
-- X, Y, eps))
-- }
--
-- # Function in C
-- n <- nrow(X) #nombre d'echantillons
-- p <- ncol(X) #nombre de covariables
-- m <- ncol(Y) #taille de Y (multivarié)
-- k <- length(piInit) #nombre de composantes dans le mélange
-- .Call("EMGLLF", phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
-- X, Y, eps, phi = double(p * m * k), rho = double(m * m * k), pi = double(k),
-- LLF = double(maxi), S = double(p * m * k), affec = integer(n), n, p, m, k,
-- PACKAGE = "valse")
--}
--
--# R version - slow but easy to read
--.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
-- X, Y, eps)
--{
- # Matrix dimensions: NOTE: phiInit *must* be an array (even if p==1)
- n <- dim(Y)[1]
- p <- dim(phiInit)[1]
- m <- dim(phiInit)[2]
- k <- dim(phiInit)[3]
- # Matrix dimensions
- n <- nrow(X)
- p <- ncol(X)
- m <- ncol(Y)
- k <- length(piInit)
-
- # Adjustments required when p==1 or m==1 (var.sel. or output dim 1)
- if (p==1 || m==1)
- phiInit <- array(phiInit, dim=c(p,m,k))
- if (m==1)
- rhoInit <- array(rhoInit, dim=c(m,m,k))
--
-- # Outputs
- phi <- array(NA, dim = c(p, m, k))
- phi[1:p, , ] <- phiInit
- phi <- phiInit
-- rho <- rhoInit
-- pi <- piInit
-- llh <- -Inf
-- S <- array(0, dim = c(p, m, k))
--
-- # Algorithm variables
-- gam <- gamInit
-- Gram2 <- array(0, dim = c(p, p, k))
-- ps2 <- array(0, dim = c(p, m, k))
-- X2 <- array(0, dim = c(n, p, k))
-- Y2 <- array(0, dim = c(n, m, k))
-- EPS <- 1e-15
--
-- for (ite in 1:maxi)
-- {
-- # Remember last pi,rho,phi values for exit condition in the end of loop
-- Phi <- phi
-- Rho <- rho
-- Pi <- pi
--
-- # Computations associated to X and Y
-- for (r in 1:k)
-- {
-- for (mm in 1:m)
-- Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
-- for (i in 1:n)
-- X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
-- for (mm in 1:m)
-- ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
-- for (j in 1:p)
-- {
-- for (s in 1:p)
-- Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
-- }
-- }
--
-- ## M step
--
-- # For pi
-- b <- sapply(1:k, function(r) sum(abs(phi[, , r])))
-- gam2 <- colSums(gam)
-- a <- sum(gam %*% log(pi))
--
-- # While the proportions are nonpositive
-- kk <- 0
-- pi2AllPositive <- FALSE
-- while (!pi2AllPositive)
-- {
-- pi2 <- pi + 0.1^kk * ((1/n) * gam2 - pi)
-- pi2AllPositive <- all(pi2 >= 0)
-- kk <- kk + 1
-- }
--
-- # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
-- while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
-- # na.rm=TRUE to handle 0*log(0)
-- -sum(gam2 * log(pi2), na.rm=TRUE)/n + lambda * sum(pi2^gamma * b))
-- {
-- pi2 <- pi + 0.1^kk * (1/n * gam2 - pi)
-- kk <- kk + 1
-- }
-- t <- 0.1^kk
-- pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi))
--
-- # For phi and rho
-- for (r in 1:k)
-- {
-- for (mm in 1:m)
-- {
-- ps <- 0
-- for (i in 1:n)
-- ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r])
-- nY2 <- sum(Y2[, mm, r]^2)
-- rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2)
-- }
-- }
--
-- for (r in 1:k)
-- {
-- for (j in 1:p)
-- {
-- for (mm in 1:m)
-- {
-- S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] +
-- sum(phi[-j, mm, r] * Gram2[j, -j, r])
-- if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) {
-- phi[j, mm, r] <- 0
-- } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) {
-- phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r]
-- } else {
-- phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r]
-- }
-- }
-- }
-- }
--
-- ## E step
--
-- # Precompute det(rho[,,r]) for r in 1...k
- detRho <- sapply(1:k, function(r) det(rho[, , r]))
- detRho <- sapply(1:k, function(r) gdet(rho[, , r]))
-- sumLogLLH <- 0
-- for (i in 1:n)
-- {
-- # Update gam[,]; use log to avoid numerical problems
-- logGam <- sapply(1:k, function(r) {
-- log(pi[r]) + log(detRho[r]) - 0.5 *
-- sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2)
-- })
--
-- logGam <- logGam - max(logGam) #adjust without changing proportions
-- gam[i, ] <- exp(logGam)
-- norm_fact <- sum(gam[i, ])
-- gam[i, ] <- gam[i, ] / norm_fact
-- sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2))
-- }
--
-- sumPen <- sum(pi^gamma * b)
-- last_llh <- llh
- llh <- -sumLogLLH/n #+ lambda * sumPen
- llh <- -sumLogLLH/n + lambda * sumPen
-- dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh)))
-- Dist1 <- max((abs(phi - Phi))/(1 + abs(phi)))
-- Dist2 <- max((abs(rho - Rho))/(1 + abs(rho)))
-- Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi)))
-- dist2 <- max(Dist1, Dist2, Dist3)
--
-- if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
-- break
-- }
--
-- list(phi = phi, rho = rho, pi = pi, llh = llh, S = S)
--}
+++ /dev/null
--#' EMGrank
--#'
--#' Description de EMGrank
--#'
--#' @param Pi Parametre de proportion
--#' @param Rho Parametre initial de variance renormalisé
--#' @param mini Nombre minimal d'itérations dans l'algorithme EM
--#' @param maxi Nombre maximal d'itérations dans l'algorithme EM
--#' @param X Régresseurs
--#' @param Y Réponse
--#' @param tau Seuil pour accepter la convergence
--#' @param rank Vecteur des rangs possibles
--#'
--#' @return A list ...
--#' phi : parametre de moyenne renormalisé, calculé par l'EM
--#' LLF : log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
--#'
--#' @export
--EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast = TRUE)
--{
-- if (!fast)
-- {
-- # Function in R
-- return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
-- }
--
-- # Function in C
-- n <- nrow(X) #nombre d'echantillons
-- p <- ncol(X) #nombre de covariables
-- m <- ncol(Y) #taille de Y (multivarié)
-- k <- length(Pi) #nombre de composantes dans le mélange
-- .Call("EMGrank", Pi, Rho, mini, maxi, X, Y, tau, rank, phi = double(p * m * k),
-- LLF = double(1), n, p, m, k, PACKAGE = "valse")
--}
--
--# helper to always have matrices as arg (TODO: put this elsewhere? improve?) -->
--# Yes, we should use by-columns storage everywhere... [later!]
--matricize <- function(X)
--{
-- if (!is.matrix(X))
-- return(t(as.matrix(X)))
-- return(X)
--}
--
--# R version - slow but easy to read
--.EMGrank_R <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
--{
-- # matrix dimensions
- n <- dim(X)[1]
- p <- dim(X)[2]
- m <- dim(Rho)[2]
- k <- dim(Rho)[3]
- n <- nrow(X)
- p <- ncol(X)
- m <- ncol(Y)
- k <- length(Pi)
--
-- # init outputs
-- phi <- array(0, dim = c(p, m, k))
-- Z <- rep(1, n)
-- LLF <- 0
--
-- # local variables
-- Phi <- array(0, dim = c(p, m, k))
-- deltaPhi <- c()
-- sumDeltaPhi <- 0
-- deltaPhiBufferSize <- 20
--
-- # main loop
-- ite <- 1
-- while (ite <= mini || (ite <= maxi && sumDeltaPhi > tau))
-- {
-- # M step: update for Beta ( and then phi)
-- for (r in 1:k)
-- {
-- Z_indice <- seq_len(n)[Z == r] #indices where Z == r
-- if (length(Z_indice) == 0)
-- next
-- # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
-- s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*%
-- crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ])))
-- S <- s$d
-- # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation
-- # of the initial product
-- if (rank[r] < length(S))
-- S[(rank[r] + 1):length(S)] <- 0
-- phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r]
-- }
--
-- # Step E and computation of the loglikelihood
-- sumLogLLF2 <- 0
-- for (i in seq_len(n))
-- {
-- sumLLF1 <- 0
-- maxLogGamIR <- -Inf
-- for (r in seq_len(k))
-- {
-- dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r])
- logGamIR <- log(Pi[r]) + log(det(Rho[, , r])) - 0.5 * dotProduct
- logGamIR <- log(Pi[r]) + log(gdet(Rho[, , r])) - 0.5 * dotProduct
-- # Z[i] = index of max (gam[i,])
-- if (logGamIR > maxLogGamIR)
-- {
-- Z[i] <- r
-- maxLogGamIR <- logGamIR
-- }
-- sumLLF1 <- sumLLF1 + exp(logGamIR)/(2 * pi)^(m/2)
-- }
-- sumLogLLF2 <- sumLogLLF2 + log(sumLLF1)
-- }
--
-- LLF <- -1/n * sumLogLLF2
--
-- # update distance parameter to check algorithm convergence (delta(phi, Phi))
-- deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
-- if (length(deltaPhi) > deltaPhiBufferSize)
-- deltaPhi <- deltaPhi[2:length(deltaPhi)]
-- sumDeltaPhi <- sum(abs(deltaPhi))
--
-- # update other local variables
-- Phi <- phi
-- ite <- ite + 1
-- }
-- return(list(phi = phi, LLF = LLF))
--}
+++ /dev/null
--#' computeGridLambda
--#'
--#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator
--#'
--#' @param phiInit value for phi
- #' @param rhoInit for rho
- #' @param piInit for pi
-#' @param rhoInit\tvalue for rho
-#' @param piInit\tvalue for pi
--#' @param gamInit value for gamma
--#' @param X matrix of covariates (of size n*p)
--#' @param Y matrix of responses (of size n*m)
--#' @param gamma power of weights in the penalty
--#' @param mini minimum number of iterations in EM algorithm
--#' @param maxi maximum number of iterations in EM algorithm
--#' @param tau threshold to stop EM algorithm
--#'
--#' @return the grid of regularization parameters
--#'
--#' @export
--computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini,
-- maxi, tau, fast)
--{
-- n <- nrow(X)
- p <- dim(phiInit)[1]
- m <- dim(phiInit)[2]
- k <- dim(phiInit)[3]
- p <- ncol(X)
- m <- ncol(Y)
- k <- length(piInit)
--
-- list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0,
-- X, Y, tau, fast)
-- grid <- array(0, dim = c(p, m, k))
- for (j in 1:p)
- for (i in 1:p)
-- {
- for (mm in 1:m)
- grid[j, mm, ] <- abs(list_EMG$S[j, mm, ])/(n * list_EMG$pi^gamma)
- for (j in 1:m)
- grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma)
-- }
-- sort(unique(grid))
--}
+++ /dev/null
--#' constructionModelesLassoMLE
--#'
--#' Construct a collection of models with the Lasso-MLE procedure.
--#'
--#' @param phiInit an initialization for phi, get by initSmallEM.R
--#' @param rhoInit an initialization for rho, get by initSmallEM.R
--#' @param piInit an initialization for pi, get by initSmallEM.R
--#' @param gamInit an initialization for gam, get by initSmallEM.R
--#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
--#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
--#' @param gamma integer for the power in the penaly, by default = 1
--#' @param X matrix of covariates (of size n*p)
--#' @param Y matrix of responses (of size n*m)
--#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
--#' @param S output of selectVariables.R
--#' @param ncores Number of cores, by default = 3
--#' @param fast TRUE to use compiled C code, FALSE for R code only
--#' @param verbose TRUE to show some execution traces
--#'
--#' @return a list with several models, defined by phi, rho, pi, llh
--#'
--#' @export
--constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
-- maxi, gamma, X, Y, eps, S, ncores = 3, fast, verbose)
--{
-- if (ncores > 1)
-- {
-- cl <- parallel::makeCluster(ncores, outfile = "")
-- parallel::clusterExport(cl, envir = environment(), varlist = c("phiInit",
-- "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S",
-- "ncores", "fast", "verbose"))
-- }
--
-- # Individual model computation
-- computeAtLambda <- function(lambda)
-- {
-- if (ncores > 1)
-- require("valse") #nodes start with an empty environment
--
-- if (verbose)
-- print(paste("Computations for lambda=", lambda))
--
- n <- dim(X)[1]
- p <- dim(phiInit)[1]
- m <- dim(phiInit)[2]
- k <- dim(phiInit)[3]
- n <- nrow(X)
- p <- ncol(X)
- m <- ncol(Y)
- k <- length(piInit)
-- sel.lambda <- S[[lambda]]$selected
-- # col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix
-- col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars
-- if (length(col.sel) == 0)
-- return(NULL)
--
-- # lambda == 0 because we compute the EMV: no penalization here
- res <- EMGLLF(array(phiInit[col.sel, , ],dim=c(length(col.sel),m,k)), rhoInit,
- piInit, gamInit, mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast)
- res <- EMGLLF(array(phiInit,dim=c(p,m,k))[col.sel, , ], rhoInit, piInit, gamInit,
- mini, maxi, gamma, 0, as.matrix(X[, col.sel]), Y, eps, fast)
--
-- # Eval dimension from the result + selected
-- phiLambda2 <- res$phi
-- rhoLambda <- res$rho
-- piLambda <- res$pi
-- phiLambda <- array(0, dim = c(p, m, k))
-- for (j in seq_along(col.sel))
-- phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ]
-- dimension <- length(unlist(sel.lambda))
--
- ## Computation of the loglikelihood
- # Precompute det(rhoLambda[,,r]) for r in 1...k
- detRho <- sapply(1:k, function(r) det(rhoLambda[, , r]))
- sumLogLLH <- 0
- for (i in 1:n)
- # Computation of the loglikelihood
- densite <- vector("double", n)
- for (r in 1:k)
-- {
- # Update gam[,]; use log to avoid numerical problems
- logGam <- sapply(1:k, function(r) {
- log(piLambda[r]) + log(detRho[r]) - 0.5 *
- sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2)
- })
-
- logGam <- logGam - max(logGam) #adjust without changing proportions
- gam[i, ] <- exp(logGam)
- norm_fact <- sum(gam[i, ])
- gam[i, ] <- gam[i, ] / norm_fact
- sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2))
- if (length(col.sel) == 1)
- {
- delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r])))
- } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r]))
- densite <- densite + piLambda[r] * gdet(rhoLambda[, , r])/(sqrt(2 * base::pi))^m *
- exp(-diag(tcrossprod(delta))/2)
-- }
- llhLambda <- c(sumLogLLH/n, (dimension + m + 1) * k - 1)
- # densite <- vector("double", n)
- # for (r in 1:k)
- # {
- # if (length(col.sel) == 1)
- # {
- # delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r])))
- # } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r]))
- # densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m *
- # exp(-rowSums(delta^2)/2)
- # }
- # llhLambda <- c(mean(log(densite)), (dimension + m + 1) * k - 1)
- llhLambda <- c(sum(log(densite)), (dimension + m + 1) * k - 1)
-- list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda)
-- }
--
-- # For each lambda, computation of the parameters
-- out <-
-- if (ncores > 1) {
-- parLapply(cl, 1:length(S), computeAtLambda)
-- } else {
-- lapply(1:length(S), computeAtLambda)
-- }
--
-- if (ncores > 1)
-- parallel::stopCluster(cl)
--
-- out
--}
+++ /dev/null
--#' constructionModelesLassoRank
--#'
--#' Construct a collection of models with the Lasso-Rank procedure.
--#'
--#' @param S output of selectVariables.R
--#' @param k number of components
--#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
--#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
--#' @param X matrix of covariates (of size n*p)
--#' @param Y matrix of responses (of size n*m)
--#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
--#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
--#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
--#' @param ncores Number of cores, by default = 3
--#' @param fast TRUE to use compiled C code, FALSE for R code only
--#' @param verbose TRUE to show some execution traces
--#'
--#' @return a list with several models, defined by phi, rho, pi, llh
--#'
--#' @export
--constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max,
-- ncores, fast, verbose)
--{
- n <- dim(X)[1]
- p <- dim(X)[2]
- m <- dim(Y)[2]
- n <- nrow(X)
- p <- ncol(X)
- m <- ncol(Y)
-- L <- length(S)
--
-- # Possible interesting ranks
-- deltaRank <- rank.max - rank.min + 1
-- Size <- deltaRank^k
-- RankLambda <- matrix(0, nrow = Size * L, ncol = k + 1)
-- for (r in 1:k)
-- {
-- # On veut le tableau de toutes les combinaisons de rangs possibles, et des
-- # lambdas Dans la première colonne : on répète (rank.max-rank.min)^(k-1) chaque
-- # chiffre : ça remplit la colonne Dans la deuxieme : on répète
-- # (rank.max-rank.min)^(k-2) chaque chiffre, et on fait ça (rank.max-rank.min)^2
-- # fois ... Dans la dernière, on répète chaque chiffre une fois, et on fait ça
-- # (rank.min-rank.max)^(k-1) fois.
-- RankLambda[, r] <- rep(rank.min + rep(0:(deltaRank - 1), deltaRank^(r - 1),
-- each = deltaRank^(k - r)), each = L)
-- }
-- RankLambda[, k + 1] <- rep(1:L, times = Size)
--
-- if (ncores > 1)
-- {
-- cl <- parallel::makeCluster(ncores, outfile = "")
-- parallel::clusterExport(cl, envir = environment(), varlist = c("A1", "Size",
-- "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores",
-- "verbose"))
-- }
--
-- computeAtLambda <- function(index)
-- {
-- lambdaIndex <- RankLambda[index, k + 1]
-- rankIndex <- RankLambda[index, 1:k]
-- if (ncores > 1)
-- require("valse") #workers start with an empty environment
--
-- # 'relevant' will be the set of relevant columns
-- selected <- S[[lambdaIndex]]$selected
-- relevant <- c()
-- for (j in 1:p)
-- {
-- if (length(selected[[j]]) > 0)
-- relevant <- c(relevant, j)
-- }
-- if (max(rankIndex) < length(relevant))
-- {
-- phi <- array(0, dim = c(p, m, k))
-- if (length(relevant) > 0)
-- {
-- res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi,
-- X[, relevant], Y, eps, rankIndex, fast)
-- llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m)))
-- phi[relevant, , ] <- res$phi
-- }
-- list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho)
-- }
-- }
--
-- # For each lambda in the grid we compute the estimators
-- out <-
-- if (ncores > 1) {
-- parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
-- } else {
-- lapply(seq_len(length(S) * Size), computeAtLambda)
-- }
--
-- if (ncores > 1)
-- parallel::stopCluster(cl)
--
-- out
--}
+++ /dev/null
--#' generateXY
--#'
--#' Generate a sample of (X,Y) of size n
--#'
--#' @param n sample size
--#' @param π proportion for each cluster
--#' @param meanX matrix of group means for covariates (of size p)
--#' @param covX covariance for covariates (of size p*p)
--#' @param β regression matrix, of size p*m*k
--#' @param covY covariance for the response vector (of size m*m*K)
--#'
--#' @return list with X and Y
--#'
--#' @export
--generateXY <- function(n, π, meanX, β, covX, covY)
--{
-- p <- dim(covX)[1]
-- m <- dim(covY)[1]
-- k <- dim(covY)[3]
--
-- X <- matrix(nrow = 0, ncol = p)
-- Y <- matrix(nrow = 0, ncol = m)
--
-- # random generation of the size of each population in X~Y (unordered)
-- sizePop <- rmultinom(1, n, π)
-- class <- c() #map i in 1:n --> index of class in 1:k
--
-- for (i in 1:k)
-- {
-- class <- c(class, rep(i, sizePop[i]))
-- newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX)
-- X <- rbind(X, newBlockX)
-- Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*%
-- β[, , i], covY[, , i]))))
-- }
--
-- shuffle <- sample(n)
-- list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle])
--}
+++ /dev/null
--#' initialization of the EM algorithm
--#'
--#' @param k number of components
--#' @param X matrix of covariates (of size n*p)
--#' @param Y matrix of responses (of size n*m)
--#'
--#' @return a list with phiInit, rhoInit, piInit, gamInit
--#' @export
--#' @importFrom methods new
--#' @importFrom stats cutree dist hclust runif
--initSmallEM <- function(k, X, Y, fast)
--{
- n <- nrow(Y)
- m <- ncol(Y)
- n <- nrow(X)
-- p <- ncol(X)
- m <- ncol(Y)
-- nIte <- 20
-- Zinit1 <- array(0, dim = c(n, nIte))
-- betaInit1 <- array(0, dim = c(p, m, k, nIte))
-- sigmaInit1 <- array(0, dim = c(m, m, k, nIte))
-- phiInit1 <- array(0, dim = c(p, m, k, nIte))
-- rhoInit1 <- array(0, dim = c(m, m, k, nIte))
-- Gam <- matrix(0, n, k)
-- piInit1 <- matrix(0, nIte, k)
-- gamInit1 <- array(0, dim = c(n, k, nIte))
-- LLFinit1 <- list()
--
-- # require(MASS) #Moore-Penrose generalized inverse of matrix
-- for (repet in 1:nIte)
-- {
-- distance_clus <- dist(cbind(X, Y))
-- tree_hier <- hclust(distance_clus)
-- Zinit1[, repet] <- cutree(tree_hier, k)
--
-- for (r in 1:k)
-- {
-- Z <- Zinit1[, repet]
-- Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r
-- if (length(Z_indice) == 1) {
-- betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*%
-- crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
-- } else {
-- betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*%
-- crossprod(X[Z_indice, ], Y[Z_indice, ])
-- }
-- sigmaInit1[, , r, repet] <- diag(m)
-- phiInit1[, , r, repet] <- betaInit1[, , r, repet] #/ sigmaInit1[,,r,repet]
-- rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
-- piInit1[repet, r] <- mean(Z == r)
-- }
--
-- for (i in 1:n)
-- {
-- for (r in 1:k)
-- {
-- dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
-- - X[i, ] %*% phiInit1[, , r, repet])
-- Gam[i, r] <- piInit1[repet, r] *
- det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
- gdet(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
-- }
-- sumGamI <- sum(Gam[i, ])
-- gamInit1[i, , repet] <- Gam[i, ]/sumGamI
-- }
--
-- miniInit <- 10
-- maxiInit <- 11
--
-- init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ],
-- gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y,
-- eps = 1e-04, fast)
-- LLFinit1[[repet]] <- init_EMG$llh
-- }
-- b <- which.min(LLFinit1)
-- phiInit <- phiInit1[, , , b]
-- rhoInit <- rhoInit1[, , , b]
-- piInit <- piInit1[b, ]
-- gamInit <- gamInit1[, , b]
--
-- return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
--}
+++ /dev/null
--#' valse
--#'
--#' Main function
--#'
--#' @param X matrix of covariates (of size n*p)
--#' @param Y matrix of responses (of size n*m)
--#' @param procedure among 'LassoMLE' or 'LassoRank'
--#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC'
--#' @param gamma integer for the power in the penaly, by default = 1
--#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
--#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
--#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
--#' @param kmin integer, minimum number of clusters, by default = 2
--#' @param kmax integer, maximum number of clusters, by default = 10
--#' @param rank.min integer, minimum rank in the low rank procedure, by default = 1
--#' @param rank.max integer, maximum rank in the low rank procedure, by default = 5
--#' @param ncores_outer Number of cores for the outer loop on k
--#' @param ncores_inner Number of cores for the inner loop on lambda
--#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
- #' @param compute_grid_lambda, TRUE to compute the grid, FALSE if known (in arguments)
- #' @param grid_lambda, a vector with regularization parameters if known, by default 0
--#' @param size_coll_mod (Maximum) size of a collection of models
--#' @param fast TRUE to use compiled C code, FALSE for R code only
--#' @param verbose TRUE to show some execution traces
--#'
--#' @return a list with estimators of parameters
--#'
--#' @examples
--#' #TODO: a few examples
--#' @export
--valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10,
-- maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1,
- ncores_inner = 1, thresh = 1e-08, compute_grid_lambda = TRUE, grid_lambda = 0, size_coll_mod = 10, fast = TRUE, verbose = FALSE,
- ncores_inner = 1, thresh = 1e-08, size_coll_mod = 10, fast = TRUE, verbose = FALSE,
-- plot = TRUE)
--{
- p <- dim(X)[2]
- m <- dim(Y)[2]
- n <- dim(X)[1]
- n <- nrow(X)
- p <- ncol(X)
- m <- ncol(Y)
--
-- if (verbose)
-- print("main loop: over all k and all lambda")
--
-- if (ncores_outer > 1) {
-- cl <- parallel::makeCluster(ncores_outer, outfile = "")
-- parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X",
-- "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin",
-- "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh",
-- "size_coll_mod", "verbose", "p", "m"))
-- }
--
-- # Compute models with k components
-- computeModels <- function(k)
-- {
-- if (ncores_outer > 1)
-- require("valse") #nodes start with an empty environment
--
-- if (verbose)
-- print(paste("Parameters initialization for k =", k))
-- # smallEM initializes parameters by k-means and regression model in each
-- # component, doing this 20 times, and keeping the values maximizing the
-- # likelihood after 10 iterations of the EM algorithm.
-- P <- initSmallEM(k, X, Y, fast)
- if (compute_grid_lambda == TRUE)
- {
- grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
- X, Y, gamma, mini, maxi, eps, fast)
- }
- grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit,
- X, Y, gamma, mini, maxi, eps, fast)
-- if (length(grid_lambda) > size_coll_mod)
-- grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
--
-- if (verbose)
-- print("Compute relevant parameters")
-- # select variables according to each regularization parameter from the grid:
-- # S$selected corresponding to selected variables
-- S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi,
-- gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
--
-- if (procedure == "LassoMLE") {
-- if (verbose)
-- print("run the procedure Lasso-MLE")
-- # compute parameter estimations, with the Maximum Likelihood Estimator,
-- # restricted on selected variables.
-- models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit,
-- P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
-- } else {
-- if (verbose)
-- print("run the procedure Lasso-Rank")
-- # compute parameter estimations, with the Low Rank Estimator, restricted on
-- # selected variables.
-- models <- constructionModelesLassoRank(S, k, mini, maxi, X, Y, eps, rank.min,
-- rank.max, ncores_inner, fast, verbose)
-- }
-- # warning! Some models are NULL after running selectVariables
-- models <- models[sapply(models, function(cell) !is.null(cell))]
-- models
-- }
--
-- # List (index k) of lists (index lambda) of models
-- models_list <-
-- if (ncores_outer > 1) {
-- parLapply(cl, kmin:kmax, computeModels)
-- } else {
-- lapply(kmin:kmax, computeModels)
-- }
-- if (ncores_outer > 1)
-- parallel::stopCluster(cl)
--
-- if (!requireNamespace("capushe", quietly = TRUE))
-- {
-- warning("'capushe' not available: returning all models")
-- return(models_list)
-- }
--
-- # Get summary 'tableauRecap' from models
-- tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
-- {
-- models <- models_list[[i]]
-- # For a collection of models (same k, several lambda):
-- LLH <- sapply(models, function(model) model$llh[1])
-- k <- length(models[[1]]$pi)
-- sumPen <- sapply(models, function(model) k * (dim(model$rho)[1] + sum(model$phi[,
-- , 1] != 0) + 1) - 1)
-- data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n,
-- complexity = sumPen, contrast = -LLH)
-- }))
-- tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
- if (verbose == TRUE)
- {
- print(tableauRecap)
- }
-
-- modSel <- capushe::capushe(tableauRecap, n)
-- indModSel <- if (selecMod == "DDSE")
-- as.numeric(modSel@DDSE@model) else if (selecMod == "Djump")
-- as.numeric(modSel@Djump@model) else if (selecMod == "BIC")
-- modSel@BIC_capushe$model else if (selecMod == "AIC")
-- modSel@AIC_capushe$model
--
-- mod <- as.character(tableauRecap[indModSel, 1])
-- listMod <- as.integer(unlist(strsplit(mod, "[.]")))
-- modelSel <- models_list[[listMod[1]]][[listMod[2]]]
--
-- ## Affectations
-- Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n)
-- for (i in 1:n)
-- {
-- for (r in 1:length(modelSel$pi))
-- {
-- sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2)
- Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[, , r])
- Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * gdet(modelSel$rho[, , r])
-- }
-- }
-- Gam <- Gam/rowSums(Gam)
-- modelSel$affec <- apply(Gam, 1, which.max)
-- modelSel$proba <- Gam
- modelSel$tableau <- tableauRecap
--
-- if (plot)
-- print(plot_valse(X, Y, modelSel, n))
--
-- return(modelSel)
--}
+++ /dev/null
--#' Plot
--#'
--#' It is a function which plots relevant parameters
--#'
--#' @param X matrix of covariates (of size n*p)
--#' @param Y matrix of responses (of size n*m)
--#' @param model the model constructed by valse procedure
--#' @param n sample size
--#' @return several plots
--#'
--#' @examples TODO
--#'
--#' @export
--#'
--plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
--{
-- require("gridExtra")
-- require("ggplot2")
-- require("reshape2")
-- require("cowplot")
--
-- K <- length(model$pi)
-- ## regression matrices
-- gReg <- list()
-- for (r in 1:K)
-- {
-- Melt <- melt(t((model$phi[, , r])))
-- gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) +
-- geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
-- midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
-- }
-- print(gReg)
--
-- ## Differences between two clusters
-- if (comp)
-- {
-- if (is.na(k1) || is.na(k))
-- print("k1 and k2 must be integers, representing the clusters you want to compare")
-- Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
-- gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
-- + geom_tile()
-- + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
-- space = "Lab")
-- + ggtitle(paste("Difference between regression matrices in cluster",
-- k1, "and", k2))
-- print(gDiff)
-- }
--
-- ### Covariance matrices
-- matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
-- for (r in 1:K)
-- matCov[, r] <- diag(model$rho[, , r])
-- MeltCov <- melt(matCov)
-- gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
-- + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
-- space = "Lab")
-- + ggtitle("Covariance matrices")
-- print(gCov)
--
-- ### Proportions
-- gam2 <- matrix(NA, ncol = K, nrow = n)
-- for (i in 1:n)
-- gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
--
-- bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2))
-- + geom_boxplot()
-- + theme(legend.position = "none")
-- + background_grid(major = "xy", minor = "none")
-- print(bp)
--
-- ### Mean in each cluster
-- XY <- cbind(X, Y)
-- XY_class <- list()
-- meanPerClass <- matrix(0, ncol = K, nrow = dim(XY)[2])
-- for (r in 1:K)
-- {
-- XY_class[[r]] <- XY[model$affec == r, ]
-- if (sum(model$affec == r) == 1) {
-- meanPerClass[, r] <- XY_class[[r]]
-- } else {
-- meanPerClass[, r] <- apply(XY_class[[r]], 2, mean)
-- }
-- }
-- data <- data.frame(mean = as.vector(meanPerClass),
-- cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
-- g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
-- print(g + geom_line(aes(linetype = cluster, color = cluster))
-- + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster"))
--}
+++ /dev/null
--#' selectVariables
--#'
--#' It is a function which construct, for a given lambda, the sets of relevant variables.
--#'
--#' @param phiInit an initial estimator for phi (size: p*m*k)
--#' @param rhoInit an initial estimator for rho (size: m*m*k)
- #' @param piInit an initial estimator for pi (size : k)
-#' @param piInit\tan initial estimator for pi (size : k)
--#' @param gamInit an initial estimator for gamma
- #' @param mini minimum number of iterations in EM algorithm
- #' @param maxi maximum number of iterations in EM algorithm
- #' @param gamma power in the penalty
-#' @param mini\t\tminimum number of iterations in EM algorithm
-#' @param maxi\t\tmaximum number of iterations in EM algorithm
-#' @param gamma\t power in the penalty
--#' @param glambda grid of regularization parameters
- #' @param X matrix of regressors
- #' @param Y matrix of responses
-#' @param X\t\t\t matrix of regressors
-#' @param Y\t\t\t matrix of responses
--#' @param thresh real, threshold to say a variable is relevant, by default = 1e-8
- #' @param eps threshold to say that EM algorithm has converged
-#' @param eps\t\t threshold to say that EM algorithm has converged
--#' @param ncores Number or cores for parallel execution (1 to disable)
--#'
--#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
--#'
--#' @examples TODO
--#'
--#' @export
--#'
--selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma,
-- glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast)
--{
-- if (ncores > 1) {
-- cl <- parallel::makeCluster(ncores, outfile = "")
-- parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit",
-- "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
-- }
--
-- # Computation for a fixed lambda
-- computeCoefs <- function(lambda)
-- {
-- params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
-- X, Y, eps, fast)
--
- p <- dim(phiInit)[1]
- m <- dim(phiInit)[2]
- p <- ncol(X)
- m <- ncol(Y)
--
-- # selectedVariables: list where element j contains vector of selected variables
-- # in [1,m]
-- selectedVariables <- lapply(1:p, function(j) {
-- # from boolean matrix mxk of selected variables obtain the corresponding boolean
-- # m-vector, and finally return the corresponding indices
- seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
- if (m>1) {
- seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
- } else {
- if (any(params$phi[j, , ] > thresh))
- 1
- else
- numeric(0)
- }
-- })
--
-- list(selected = selectedVariables, Rho = params$rho, Pi = params$pi)
-- }
--
-- # For each lambda in the grid, we compute the coefficients
-- out <-
-- if (ncores > 1) {
-- parLapply(cl, glambda, computeCoefs)
-- } else {
-- lapply(glambda, computeCoefs)
-- }
-- if (ncores > 1)
-- parallel::stopCluster(cl)
-- # Suppress models which are computed twice En fait, ca ca fait la comparaison de
-- # tous les parametres On veut juste supprimer ceux qui ont les memes variables
-- # sélectionnées
-- # sha1_array <- lapply(out, digest::sha1) out[ duplicated(sha1_array) ]
-- selec <- lapply(out, function(model) model$selected)
-- ind_dup <- duplicated(selec)
-- ind_uniq <- which(!ind_dup)
-- out2 <- list()
-- for (l in 1:length(ind_uniq))
-- out2[[l]] <- out[[ind_uniq[l]]]
-- out2
--}
+++ /dev/null
--ou alors data_test.RData, possible aussi
+++ /dev/null
--\name{valse-package}
--\alias{valse-package}
--\alias{valse}
--\docType{package}
--
--\title{
-- \packageTitle{valse}
--}
--
--\description{
-- \packageDescription{valse}
--}
--
--\details{
-- The package devtools should be useful in development stage, since we rely on testthat for
-- unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette.
-- Concerning the other suggested packages:
-- \itemize{
-- \item{parallel (generally) permits to run the bootstrap method faster.}
-- }
--
-- The three main functions are ...
--}
--
--\author{
-- \packageAuthor{valse}
--
-- Maintainer: \packageMaintainer{valse}
--}
--
--%\references{
--% TODO: Literature or other references for background information
--%}
--
--%\examples{
--% TODO: simple examples of the most important functions
--%}
+++ /dev/null
--#Debug flags
--PKG_CFLAGS=-g -I./sources
--
--#Prod flags:
--#PKG_CFLAGS=-O2 -I./sources
--
--PKG_LIBS=-lm -lgsl -lcblas
--
--SOURCES = $(wildcard adapters/*.c sources/*.c)
--
--OBJECTS = $(SOURCES:.c=.o)
+++ /dev/null
--#include <R.h>
--#include <Rdefines.h>
--#include "EMGLLF.h"
--
--// See comments in src/sources/EMGLLF.c and R/EMGLLF.R (wrapper)
--SEXP EMGLLF(
-- SEXP phiInit_,
-- SEXP rhoInit_,
-- SEXP piInit_,
-- SEXP gamInit_,
-- SEXP mini_,
-- SEXP maxi_,
-- SEXP gamma_,
-- SEXP lambda_,
-- SEXP X_,
-- SEXP Y_,
-- SEXP tau_
--) {
-- // Get matrices dimensions
-- int n = INTEGER(getAttrib(X_, R_DimSymbol))[0];
-- SEXP dim = getAttrib(phiInit_, R_DimSymbol);
-- int p = INTEGER(dim)[0];
-- int m = INTEGER(dim)[1];
-- int k = INTEGER(dim)[2];
--
-- ////////////
-- // INPUTS //
-- ////////////
--
-- // get scalar parameters
-- int mini = INTEGER_VALUE(mini_);
-- int maxi = INTEGER_VALUE(maxi_);
-- double gamma = NUMERIC_VALUE(gamma_);
-- double lambda = NUMERIC_VALUE(lambda_);
-- double tau = NUMERIC_VALUE(tau_);
--
-- // Get pointers from SEXP arrays ; WARNING: by columns !
-- double* phiInit = REAL(phiInit_);
-- double* rhoInit = REAL(rhoInit_);
-- double* piInit = REAL(piInit_);
-- double* gamInit = REAL(gamInit_);
-- double* X = REAL(X_);
-- double* Y = REAL(Y_);
--
-- /////////////
-- // OUTPUTS //
-- /////////////
--
-- SEXP phi, rho, pi, LLF, S, affec, dimPhiS, dimRho;
-- PROTECT(dimPhiS = allocVector(INTSXP, 3));
-- int* pDimPhiS = INTEGER(dimPhiS);
-- pDimPhiS[0] = p; pDimPhiS[1] = m; pDimPhiS[2] = k;
-- PROTECT(dimRho = allocVector(INTSXP, 3));
-- int* pDimRho = INTEGER(dimRho);
-- pDimRho[0] = m; pDimRho[1] = m; pDimRho[2] = k;
-- PROTECT(phi = allocArray(REALSXP, dimPhiS));
-- PROTECT(rho = allocArray(REALSXP, dimRho));
-- PROTECT(pi = allocVector(REALSXP, k));
-- PROTECT(LLF = allocVector(REALSXP, maxi-mini+1));
-- PROTECT(S = allocArray(REALSXP, dimPhiS));
-- PROTECT(affec = allocVector(INTSXP, n));
-- double *pPhi=REAL(phi), *pRho=REAL(rho), *pPi=REAL(pi), *pLLF=REAL(LLF), *pS=REAL(S);
-- int *pAffec=INTEGER(affec);
--
-- ////////////////////
-- // Call to EMGLLF //
-- ////////////////////
--
-- EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau,
-- pPhi,pRho,pPi,pLLF,pS,pAffec,
-- n,p,m,k);
--
-- // Build list from OUT params and return it
-- SEXP listParams, listNames;
-- int nouts = 6;
-- PROTECT(listParams = allocVector(VECSXP, nouts));
-- char* lnames[6] = {"phi", "rho", "pi", "LLF", "S", "affec"}; //lists labels
-- PROTECT(listNames = allocVector(STRSXP,nouts));
-- for (int i=0; i<nouts; i++)
-- SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
-- setAttrib(listParams, R_NamesSymbol, listNames);
-- SET_VECTOR_ELT(listParams, 0, phi);
-- SET_VECTOR_ELT(listParams, 1, rho);
-- SET_VECTOR_ELT(listParams, 2, pi);
-- SET_VECTOR_ELT(listParams, 3, LLF);
-- SET_VECTOR_ELT(listParams, 4, S);
-- SET_VECTOR_ELT(listParams, 5, affec);
--
-- UNPROTECT(10);
-- return listParams;
--}
+++ /dev/null
--#include <R.h>
--#include <Rdefines.h>
--#include "EMGrank.h"
--
--// See comments in src/sources/EMGrank.c and R/EMGrank.R (wrapper)
--SEXP EMGrank(
-- SEXP Pi_,
-- SEXP Rho_,
-- SEXP mini_,
-- SEXP maxi_,
-- SEXP X_,
-- SEXP Y_,
-- SEXP tau_,
-- SEXP rank_
--) {
-- // Get matrices dimensions
-- SEXP dimX = getAttrib(X_, R_DimSymbol);
-- int n = INTEGER(dimX)[0];
-- int p = INTEGER(dimX)[1];
-- SEXP dimRho = getAttrib(Rho_, R_DimSymbol);
-- int m = INTEGER(dimRho)[0];
-- int k = INTEGER(dimRho)[2];
--
-- ////////////
-- // INPUTS //
-- ////////////
--
-- // get scalar parameters
-- int mini = INTEGER_VALUE(mini_);
-- int maxi = INTEGER_VALUE(maxi_);
-- double tau = NUMERIC_VALUE(tau_);
--
-- // Get pointers from SEXP arrays ; WARNING: by columns !
-- double* Pi = REAL(Pi_);
-- double* Rho = REAL(Rho_);
-- double* X = REAL(X_);
-- double* Y = REAL(Y_);
-- int* rank = INTEGER(rank_);
--
-- /////////////
-- // OUTPUTS //
-- /////////////
--
-- SEXP phi, LLF, dimPhi;
-- PROTECT(dimPhi = allocVector(INTSXP, 3));
-- int* pDimPhi = INTEGER(dimPhi);
-- pDimPhi[0] = p; pDimPhi[1] = m; pDimPhi[2] = k;
-- PROTECT(phi = allocArray(REALSXP, dimPhi));
-- PROTECT(LLF = allocVector(REALSXP, 1));
-- double *pPhi=REAL(phi), *pLLF=REAL(LLF);
--
-- /////////////////////
-- // Call to EMGrank //
-- /////////////////////
--
-- EMGrank_core(Pi, Rho, mini, maxi, X, Y, tau, rank,
-- pPhi,pLLF,
-- n,p,m,k);
--
-- // Build list from OUT params and return it
-- SEXP listParams, listNames;
-- PROTECT(listParams = allocVector(VECSXP, 2));
-- char* lnames[2] = {"phi", "LLF"}; //lists labels
-- PROTECT(listNames = allocVector(STRSXP,2));
-- for (int i=0; i<2; i++)
-- SET_STRING_ELT(listNames,i,mkChar(lnames[i]));
-- setAttrib(listParams, R_NamesSymbol, listNames);
-- SET_VECTOR_ELT(listParams, 0, phi);
-- SET_VECTOR_ELT(listParams, 1, LLF);
--
-- UNPROTECT(5);
-- return listParams;
--}
+++ /dev/null
--#include "utils.h"
--#include <stdlib.h>
--#include <math.h>
--#include <gsl/gsl_linalg.h>
--
--// TODO: don't recompute indexes ai(...) and mi(...) when possible
--void EMGLLF_core(
-- // IN parameters
-- const Real* phiInit, // parametre initial de moyenne renormalisé
-- const Real* rhoInit, // parametre initial de variance renormalisé
-- const Real* piInit, // parametre initial des proportions
-- const Real* gamInit, // paramètre initial des probabilités a posteriori de chaque échantillon
-- int mini, // nombre minimal d'itérations dans l'algorithme EM
-- int maxi, // nombre maximal d'itérations dans l'algorithme EM
-- Real gamma, // puissance des proportions dans la pénalisation pour un Lasso adaptatif
-- Real lambda, // valeur du paramètre de régularisation du Lasso
-- const Real* X, // régresseurs
-- const Real* Y, // réponse
-- Real tau, // seuil pour accepter la convergence
-- // OUT parameters (all pointers, to be modified)
-- Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
-- Real* rho, // parametre de variance renormalisé, calculé par l'EM
-- Real* pi, // parametre des proportions renormalisé, calculé par l'EM
-- Real* llh, // (derniere) log vraisemblance associée à cet échantillon,
-- // pour les valeurs estimées des paramètres
-- Real* S,
-- int* affec,
-- // additional size parameters
-- int n, // nombre d'echantillons
-- int p, // nombre de covariables
-- int m, // taille de Y (multivarié)
-- int k) // nombre de composantes dans le mélange
--{
-- //Initialize outputs
-- copyArray(phiInit, phi, p*m*k);
-- copyArray(rhoInit, rho, m*m*k);
-- copyArray(piInit, pi, k);
-- //S is already allocated, and doesn't need to be 'zeroed'
--
-- //Other local variables: same as in R
-- Real* gam = (Real*)malloc(n*k*sizeof(Real));
-- copyArray(gamInit, gam, n*k);
-- Real* Gram2 = (Real*)malloc(p*p*k*sizeof(Real));
-- Real* ps2 = (Real*)malloc(p*m*k*sizeof(Real));
-- Real* b = (Real*)malloc(k*sizeof(Real));
-- Real* X2 = (Real*)malloc(n*p*k*sizeof(Real));
-- Real* Y2 = (Real*)malloc(n*m*k*sizeof(Real));
-- *llh = -INFINITY;
-- Real* pi2 = (Real*)malloc(k*sizeof(Real));
-- const Real EPS = 1e-15;
-- // Additional (not at this place, in R file)
-- Real* gam2 = (Real*)malloc(k*sizeof(Real));
-- Real* sqNorm2 = (Real*)malloc(k*sizeof(Real));
-- Real* detRho = (Real*)malloc(k*sizeof(Real));
-- gsl_matrix* matrix = gsl_matrix_alloc(m, m);
-- gsl_permutation* permutation = gsl_permutation_alloc(m);
-- Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
-- Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
-- const Real gaussConstM = pow(2.*M_PI,m/2.);
-- Real* Phi = (Real*)malloc(p*m*k*sizeof(Real));
-- Real* Rho = (Real*)malloc(m*m*k*sizeof(Real));
-- Real* Pi = (Real*)malloc(k*sizeof(Real));
--
-- for (int ite=1; ite<=maxi; ite++)
-- {
-- copyArray(phi, Phi, p*m*k);
-- copyArray(rho, Rho, m*m*k);
-- copyArray(pi, Pi, k);
--
-- // Calculs associés a Y et X
-- for (int r=0; r<k; r++)
-- {
-- for (int mm=0; mm<m; mm++)
-- {
-- //Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
-- for (int u=0; u<n; u++)
-- Y2[ai(u,mm,r,n,m,k)] = sqrt(gam[mi(u,r,n,k)]) * Y[mi(u,mm,n,m)];
-- }
-- for (int i=0; i<n; i++)
-- {
-- //X2[i,,r] = sqrt(gam[i,r]) * X[i,]
-- for (int u=0; u<p; u++)
-- X2[ai(i,u,r,n,p,k)] = sqrt(gam[mi(i,r,n,k)]) * X[mi(i,u,n,p)];
-- }
-- for (int mm=0; mm<m; mm++)
-- {
-- //ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
-- for (int u=0; u<p; u++)
-- {
-- Real dotProduct = 0.;
-- for (int v=0; v<n; v++)
-- dotProduct += X2[ai(v,u,r,n,p,k)] * Y2[ai(v,mm,r,n,m,k)];
-- ps2[ai(u,mm,r,p,m,k)] = dotProduct;
-- }
-- }
-- for (int j=0; j<p; j++)
-- {
-- for (int s=0; s<p; s++)
-- {
-- //Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
-- Real dotProduct = 0.;
-- for (int u=0; u<n; u++)
-- dotProduct += X2[ai(u,j,r,n,p,k)] * X2[ai(u,s,r,n,p,k)];
-- Gram2[ai(j,s,r,p,p,k)] = dotProduct;
-- }
-- }
-- }
--
-- /////////////
-- // Etape M //
-- /////////////
--
-- // Pour pi
-- for (int r=0; r<k; r++)
-- {
-- //b[r] = sum(abs(phi[,,r]))
-- Real sumAbsPhi = 0.;
-- for (int u=0; u<p; u++)
-- for (int v=0; v<m; v++)
-- sumAbsPhi += fabs(phi[ai(u,v,r,p,m,k)]);
-- b[r] = sumAbsPhi;
-- }
-- //gam2 = colSums(gam)
-- for (int u=0; u<k; u++)
-- {
-- Real sumOnColumn = 0.;
-- for (int v=0; v<n; v++)
-- sumOnColumn += gam[mi(v,u,n,k)];
-- gam2[u] = sumOnColumn;
-- }
-- //a = sum(gam %*% log(pi))
-- Real a = 0.;
-- for (int u=0; u<n; u++)
-- {
-- Real dotProduct = 0.;
-- for (int v=0; v<k; v++)
-- dotProduct += gam[mi(u,v,n,k)] * log(pi[v]);
-- a += dotProduct;
-- }
--
-- //tant que les proportions sont negatives
-- int kk = 0,
-- pi2AllPositive = 0;
-- Real invN = 1./n;
-- while (!pi2AllPositive)
-- {
-- //pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
-- Real pow_01_kk = pow(0.1,kk);
-- for (int r=0; r<k; r++)
-- pi2[r] = pi[r] + pow_01_kk * (invN*gam2[r] - pi[r]);
-- //pi2AllPositive = all(pi2 >= 0)
-- pi2AllPositive = 1;
-- for (int r=0; r<k; r++)
-- {
-- if (pi2[r] < 0)
-- {
-- pi2AllPositive = 0;
-- break;
-- }
-- }
-- kk++;
-- }
--
-- //sum(pi^gamma * b)
-- Real piPowGammaDotB = 0.;
-- for (int v=0; v<k; v++)
-- piPowGammaDotB += pow(pi[v],gamma) * b[v];
-- //sum(pi2^gamma * b)
-- Real pi2PowGammaDotB = 0.;
-- for (int v=0; v<k; v++)
-- pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
-- //sum(gam2 * log(pi2))
-- Real gam2DotLogPi2 = 0.;
-- for (int v=0; v<k; v++)
-- gam2DotLogPi2 += gam2[v] * log(pi2[v]);
--
-- //t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante
-- while (-invN*a + lambda*piPowGammaDotB < -invN*gam2DotLogPi2 + lambda*pi2PowGammaDotB
-- && kk<1000)
-- {
-- Real pow_01_kk = pow(0.1,kk);
-- //pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
-- for (int v=0; v<k; v++)
-- pi2[v] = pi[v] + pow_01_kk * (invN*gam2[v] - pi[v]);
-- //pi2 was updated, so we recompute pi2PowGammaDotB and gam2DotLogPi2
-- pi2PowGammaDotB = 0.;
-- for (int v=0; v<k; v++)
-- pi2PowGammaDotB += pow(pi2[v],gamma) * b[v];
-- gam2DotLogPi2 = 0.;
-- for (int v=0; v<k; v++)
-- gam2DotLogPi2 += gam2[v] * log(pi2[v]);
-- kk++;
-- }
-- Real t = pow(0.1,kk);
-- //sum(pi + t*(pi2-pi))
-- Real sumPiPlusTbyDiff = 0.;
-- for (int v=0; v<k; v++)
-- sumPiPlusTbyDiff += (pi[v] + t*(pi2[v] - pi[v]));
-- //pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-- for (int v=0; v<k; v++)
-- pi[v] = (pi[v] + t*(pi2[v] - pi[v])) / sumPiPlusTbyDiff;
--
-- //Pour phi et rho
-- for (int r=0; r<k; r++)
-- {
-- for (int mm=0; mm<m; mm++)
-- {
-- Real ps = 0.,
-- nY2 = 0.;
-- // Compute ps, and nY2 = sum(Y2[,mm,r]^2)
-- for (int i=0; i<n; i++)
-- {
-- //< X2[i,,r] , phi[,mm,r] >
-- Real dotProduct = 0.;
-- for (int u=0; u<p; u++)
-- dotProduct += X2[ai(i,u,r,n,p,k)] * phi[ai(u,mm,r,p,m,k)];
-- //ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
-- ps += Y2[ai(i,mm,r,n,m,k)] * dotProduct;
-- nY2 += Y2[ai(i,mm,r,n,m,k)] * Y2[ai(i,mm,r,n,m,k)];
-- }
-- //rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
-- rho[ai(mm,mm,r,m,m,k)] = (ps + sqrt(ps*ps + 4*nY2 * gam2[r])) / (2*nY2);
-- }
-- }
--
-- for (int r=0; r<k; r++)
-- {
-- for (int j=0; j<p; j++)
-- {
-- for (int mm=0; mm<m; mm++)
-- {
-- //sum(phi[-j,mm,r] * Gram2[j,-j,r])
-- Real phiDotGram2 = 0.;
-- for (int u=0; u<p; u++)
-- {
-- if (u != j)
-- phiDotGram2 += phi[ai(u,mm,r,p,m,k)] * Gram2[ai(j,u,r,p,p,k)];
-- }
-- //S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
-- S[ai(j,mm,r,p,m,k)] = -rho[ai(mm,mm,r,m,m,k)] * ps2[ai(j,mm,r,p,m,k)]
-- + phiDotGram2;
-- Real pirPowGamma = pow(pi[r],gamma);
-- if (fabs(S[ai(j,mm,r,p,m,k)]) <= n*lambda*pirPowGamma)
-- phi[ai(j,mm,r,p,m,k)] = 0.;
-- else if (S[ai(j,mm,r,p,m,k)] > n*lambda*pirPowGamma)
-- {
-- phi[ai(j,mm,r,p,m,k)] = (n*lambda*pirPowGamma - S[ai(j,mm,r,p,m,k)])
-- / Gram2[ai(j,j,r,p,p,k)];
-- }
-- else
-- {
-- phi[ai(j,mm,r,p,m,k)] = -(n*lambda*pirPowGamma + S[ai(j,mm,r,p,m,k)])
-- / Gram2[ai(j,j,r,p,p,k)];
-- }
-- }
-- }
-- }
--
-- /////////////
-- // Etape E //
-- /////////////
--
-- // Precompute det(rho[,,r]) for r in 1...k
-- int signum;
-- for (int r=0; r<k; r++)
-- {
-- for (int u=0; u<m; u++)
-- {
-- for (int v=0; v<m; v++)
-- matrix->data[u*m+v] = rho[ai(u,v,r,m,m,k)];
-- }
-- gsl_linalg_LU_decomp(matrix, permutation, &signum);
-- detRho[r] = gsl_linalg_LU_det(matrix, signum);
-- }
--
-- Real sumLogLLH = 0.;
-- for (int i=0; i<n; i++)
-- {
-- for (int r=0; r<k; r++)
-- {
-- //compute Y[i,]%*%rho[,,r]
-- for (int u=0; u<m; u++)
-- {
-- YiRhoR[u] = 0.;
-- for (int v=0; v<m; v++)
-- YiRhoR[u] += Y[mi(i,v,n,m)] * rho[ai(v,u,r,m,m,k)];
-- }
--
-- //compute X[i,]%*%phi[,,r]
-- for (int u=0; u<m; u++)
-- {
-- XiPhiR[u] = 0.;
-- for (int v=0; v<p; v++)
-- XiPhiR[u] += X[mi(i,v,n,p)] * phi[ai(v,u,r,p,m,k)];
-- }
--
-- //compute sq norm || Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) ||_2^2
-- sqNorm2[r] = 0.;
-- for (int u=0; u<m; u++)
-- sqNorm2[r] += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
-- }
--
-- Real sumGamI = 0.;
-- for (int r=0; r<k; r++)
-- {
-- gam[mi(i,r,n,k)] = pi[r] * exp(-.5*sqNorm2[r]) * detRho[r];
-- sumGamI += gam[mi(i,r,n,k)];
-- }
--
-- sumLogLLH += log(sumGamI) - log(gaussConstM);
-- if (sumGamI > EPS) //else: gam[i,] is already ~=0
-- {
-- for (int r=0; r<k; r++)
-- gam[mi(i,r,n,k)] /= sumGamI;
-- }
-- }
--
-- //sumPen = sum(pi^gamma * b)
-- Real sumPen = 0.;
-- for (int r=0; r<k; r++)
-- sumPen += pow(pi[r],gamma) * b[r];
-- Real last_llh = *llh;
-- //llh = -sumLogLLH/n + lambda*sumPen
-- *llh = -invN * sumLogLLH + lambda * sumPen;
-- Real dist = ite==1 ? *llh : (*llh - last_llh) / (1. + fabs(*llh));
--
-- //Dist1 = max( abs(phi-Phi) / (1+abs(phi)) )
-- Real Dist1 = 0.;
-- for (int u=0; u<p; u++)
-- {
-- for (int v=0; v<m; v++)
-- {
-- for (int w=0; w<k; w++)
-- {
-- Real tmpDist = fabs(phi[ai(u,v,w,p,m,k)]-Phi[ai(u,v,w,p,m,k)])
-- / (1.+fabs(phi[ai(u,v,w,p,m,k)]));
-- if (tmpDist > Dist1)
-- Dist1 = tmpDist;
-- }
-- }
-- }
-- //Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
-- Real Dist2 = 0.;
-- for (int u=0; u<m; u++)
-- {
-- for (int v=0; v<m; v++)
-- {
-- for (int w=0; w<k; w++)
-- {
-- Real tmpDist = fabs(rho[ai(u,v,w,m,m,k)]-Rho[ai(u,v,w,m,m,k)])
-- / (1.+fabs(rho[ai(u,v,w,m,m,k)]));
-- if (tmpDist > Dist2)
-- Dist2 = tmpDist;
-- }
-- }
-- }
-- //Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)))
-- Real Dist3 = 0.;
-- for (int u=0; u<n; u++)
-- {
-- for (int v=0; v<k; v++)
-- {
-- Real tmpDist = fabs(pi[v]-Pi[v]) / (1.+fabs(pi[v]));
-- if (tmpDist > Dist3)
-- Dist3 = tmpDist;
-- }
-- }
-- //dist2=max([max(Dist1),max(Dist2),max(Dist3)]);
-- Real dist2 = Dist1;
-- if (Dist2 > dist2)
-- dist2 = Dist2;
-- if (Dist3 > dist2)
-- dist2 = Dist3;
--
-- if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
-- break;
-- }
--
-- //affec = apply(gam, 1, which.max)
-- for (int i=0; i<n; i++)
-- {
-- Real rowMax = 0.;
-- affec[i] = 0;
-- for (int j=0; j<k; j++)
-- {
-- if (gam[mi(i,j,n,k)] > rowMax)
-- {
-- affec[i] = j+1; //R indices start at 1
-- rowMax = gam[mi(i,j,n,k)];
-- }
-- }
-- }
--
-- //free memory
-- free(b);
-- free(gam);
-- free(Phi);
-- free(Rho);
-- free(Pi);
-- free(Gram2);
-- free(ps2);
-- free(detRho);
-- gsl_matrix_free(matrix);
-- gsl_permutation_free(permutation);
-- free(XiPhiR);
-- free(YiRhoR);
-- free(gam2);
-- free(pi2);
-- free(X2);
-- free(Y2);
-- free(sqNorm2);
--}
+++ /dev/null
--#ifndef valse_EMGLLF_H
--#define valse_EMGLLF_H
--
--#include "utils.h"
--
--void EMGLLF_core(
-- // IN parameters
-- const Real* phiInit,
-- const Real* rhoInit,
-- const Real* piInit,
-- const Real* gamInit,
-- int mini,
-- int maxi,
-- Real gamma,
-- Real lambda,
-- const Real* X,
-- const Real* Y,
-- Real tau,
-- // OUT parameters
-- Real* phi,
-- Real* rho,
-- Real* pi,
-- Real* LLF,
-- Real* S,
-- int* affec,
-- // additional size parameters
-- int n,
-- int p,
-- int m,
-- int k);
--
--#endif
+++ /dev/null
--#include <stdlib.h>
--#include <gsl/gsl_linalg.h>
--#include "utils.h"
--
--// Compute pseudo-inverse of a square matrix
--static Real* pinv(const Real* matrix, int dim)
--{
-- gsl_matrix* U = gsl_matrix_alloc(dim,dim);
-- gsl_matrix* V = gsl_matrix_alloc(dim,dim);
-- gsl_vector* S = gsl_vector_alloc(dim);
-- gsl_vector* work = gsl_vector_alloc(dim);
-- Real EPS = 1e-10; //threshold for singular value "== 0"
--
-- //copy matrix into U
-- copyArray(matrix, U->data, dim*dim);
--
-- //U,S,V = SVD of matrix
-- gsl_linalg_SV_decomp(U, V, S, work);
-- gsl_vector_free(work);
--
-- // Obtain pseudo-inverse by V*S^{-1}*t(U)
-- Real* inverse = (Real*)malloc(dim*dim*sizeof(Real));
-- for (int i=0; i<dim; i++)
-- {
-- for (int ii=0; ii<dim; ii++)
-- {
-- Real dotProduct = 0.0;
-- for (int j=0; j<dim; j++)
-- dotProduct += V->data[i*dim+j] * (S->data[j] > EPS ? 1.0/S->data[j] : 0.0) * U->data[ii*dim+j];
-- inverse[i*dim+ii] = dotProduct;
-- }
-- }
--
-- gsl_matrix_free(U);
-- gsl_matrix_free(V);
-- gsl_vector_free(S);
-- return inverse;
--}
--
--// TODO: comment EMGrank purpose
--void EMGrank_core(
-- // IN parameters
-- const Real* Pi, // parametre de proportion
-- const Real* Rho, // parametre initial de variance renormalisé
-- int mini, // nombre minimal d'itérations dans l'algorithme EM
-- int maxi, // nombre maximal d'itérations dans l'algorithme EM
-- const Real* X, // régresseurs
-- const Real* Y, // réponse
-- Real tau, // seuil pour accepter la convergence
-- const int* rank, // vecteur des rangs possibles
-- // OUT parameters
-- Real* phi, // parametre de moyenne renormalisé, calculé par l'EM
-- Real* LLF, // log vraisemblance associé à cet échantillon, pour les valeurs estimées des paramètres
-- // additional size parameters
-- int n, // taille de l'echantillon
-- int p, // nombre de covariables
-- int m, // taille de Y (multivarié)
-- int k) // nombre de composantes
--{
-- // Allocations, initializations
-- Real* Phi = (Real*)calloc(p*m*k,sizeof(Real));
-- Real* hatBetaR = (Real*)malloc(p*m*sizeof(Real));
-- int signum;
-- Real invN = 1.0/n;
-- int deltaPhiBufferSize = 20;
-- Real* deltaPhi = (Real*)malloc(deltaPhiBufferSize*sizeof(Real));
-- int ite = 0;
-- Real sumDeltaPhi = 0.0;
-- Real* YiRhoR = (Real*)malloc(m*sizeof(Real));
-- Real* XiPhiR = (Real*)malloc(m*sizeof(Real));
-- Real* Xr = (Real*)malloc(n*p*sizeof(Real));
-- Real* Yr = (Real*)malloc(n*m*sizeof(Real));
-- Real* tXrXr = (Real*)malloc(p*p*sizeof(Real));
-- Real* tXrYr = (Real*)malloc(p*m*sizeof(Real));
-- gsl_matrix* matrixM = gsl_matrix_alloc(p, m);
-- gsl_matrix* matrixE = gsl_matrix_alloc(m, m);
-- gsl_permutation* permutation = gsl_permutation_alloc(m);
-- gsl_matrix* V = gsl_matrix_alloc(m,m);
-- gsl_vector* S = gsl_vector_alloc(m);
-- gsl_vector* work = gsl_vector_alloc(m);
--
-- //Initialize class memberships (all elements in class 0; TODO: randomize ?)
-- int* Z = (int*)calloc(n, sizeof(int));
--
-- //Initialize phi to zero, because some M loops might exit before phi affectation
-- zeroArray(phi, p*m*k);
--
-- while (ite<mini || (ite<maxi && sumDeltaPhi>tau))
-- {
-- /////////////
-- // Etape M //
-- /////////////
--
-- //M step: Mise à jour de Beta (et donc phi)
-- for (int r=0; r<k; r++)
-- {
-- //Compute Xr = X(Z==r,:) and Yr = Y(Z==r,:)
-- int cardClustR=0;
-- for (int i=0; i<n; i++)
-- {
-- if (Z[i] == r)
-- {
-- for (int j=0; j<p; j++)
-- Xr[mi(cardClustR,j,n,p)] = X[mi(i,j,n,p)];
-- for (int j=0; j<m; j++)
-- Yr[mi(cardClustR,j,n,m)] = Y[mi(i,j,n,m)];
-- cardClustR++;
-- }
-- }
-- if (cardClustR == 0)
-- continue;
--
-- //Compute tXrXr = t(Xr) * Xr
-- for (int j=0; j<p; j++)
-- {
-- for (int jj=0; jj<p; jj++)
-- {
-- Real dotProduct = 0.0;
-- for (int u=0; u<cardClustR; u++)
-- dotProduct += Xr[mi(u,j,n,p)] * Xr[mi(u,jj,n,p)];
-- tXrXr[mi(j,jj,p,p)] = dotProduct;
-- }
-- }
--
-- //Get pseudo inverse = (t(Xr)*Xr)^{-1}
-- Real* invTXrXr = pinv(tXrXr, p);
--
-- // Compute tXrYr = t(Xr) * Yr
-- for (int j=0; j<p; j++)
-- {
-- for (int jj=0; jj<m; jj++)
-- {
-- Real dotProduct = 0.0;
-- for (int u=0; u<cardClustR; u++)
-- dotProduct += Xr[mi(u,j,n,p)] * Yr[mi(u,jj,n,m)];
-- tXrYr[mi(j,jj,p,m)] = dotProduct;
-- }
-- }
--
-- //Fill matrixM with inverse * tXrYr = (t(Xr)*Xr)^{-1} * t(Xr) * Yr
-- for (int j=0; j<p; j++)
-- {
-- for (int jj=0; jj<m; jj++)
-- {
-- Real dotProduct = 0.0;
-- for (int u=0; u<p; u++)
-- dotProduct += invTXrXr[mi(j,u,p,p)] * tXrYr[mi(u,jj,p,m)];
-- matrixM->data[j*m+jj] = dotProduct;
-- }
-- }
-- free(invTXrXr);
--
-- //U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
-- gsl_linalg_SV_decomp(matrixM, V, S, work);
--
-- //Set m-rank(r) singular values to zero, and recompose
-- //best rank(r) approximation of the initial product
-- for (int j=rank[r]; j<m; j++)
-- S->data[j] = 0.0;
--
-- //[intermediate step] Compute hatBetaR = U * S * t(V)
-- double* U = matrixM->data; //GSL require double precision
-- for (int j=0; j<p; j++)
-- {
-- for (int jj=0; jj<m; jj++)
-- {
-- Real dotProduct = 0.0;
-- for (int u=0; u<m; u++)
-- dotProduct += U[j*m+u] * S->data[u] * V->data[jj*m+u];
-- hatBetaR[mi(j,jj,p,m)] = dotProduct;
-- }
-- }
--
-- //Compute phi(:,:,r) = hatBetaR * Rho(:,:,r)
-- for (int j=0; j<p; j++)
-- {
-- for (int jj=0; jj<m; jj++)
-- {
-- Real dotProduct=0.0;
-- for (int u=0; u<m; u++)
-- dotProduct += hatBetaR[mi(j,u,p,m)] * Rho[ai(u,jj,r,m,m,k)];
-- phi[ai(j,jj,r,p,m,k)] = dotProduct;
-- }
-- }
-- }
--
-- /////////////
-- // Etape E //
-- /////////////
--
-- Real sumLogLLF2 = 0.0;
-- for (int i=0; i<n; i++)
-- {
-- Real sumLLF1 = 0.0;
-- Real maxLogGamIR = -INFINITY;
-- for (int r=0; r<k; r++)
-- {
-- //Compute
-- //Gam(i,r) = Pi(r) * det(Rho(:,:,r)) * exp( -1/2 * (Y(i,:)*Rho(:,:,r) - X(i,:)...
-- //*phi(:,:,r)) * transpose( Y(i,:)*Rho(:,:,r) - X(i,:)*phi(:,:,r) ) );
-- //split in several sub-steps
--
-- //compute det(Rho(:,:,r)) [TODO: avoid re-computations]
-- for (int j=0; j<m; j++)
-- {
-- for (int jj=0; jj<m; jj++)
-- matrixE->data[j*m+jj] = Rho[ai(j,jj,r,m,m,k)];
-- }
-- gsl_linalg_LU_decomp(matrixE, permutation, &signum);
-- Real detRhoR = gsl_linalg_LU_det(matrixE, signum);
--
-- //compute Y(i,:)*Rho(:,:,r)
-- for (int j=0; j<m; j++)
-- {
-- YiRhoR[j] = 0.0;
-- for (int u=0; u<m; u++)
-- YiRhoR[j] += Y[mi(i,u,n,m)] * Rho[ai(u,j,r,m,m,k)];
-- }
--
-- //compute X(i,:)*phi(:,:,r)
-- for (int j=0; j<m; j++)
-- {
-- XiPhiR[j] = 0.0;
-- for (int u=0; u<p; u++)
-- XiPhiR[j] += X[mi(i,u,n,p)] * phi[ai(u,j,r,p,m,k)];
-- }
--
-- //compute dotProduct < Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) . Y(:,i)*rho(:,:,r)-X(i,:)*phi(:,:,r) >
-- Real dotProduct = 0.0;
-- for (int u=0; u<m; u++)
-- dotProduct += (YiRhoR[u]-XiPhiR[u]) * (YiRhoR[u]-XiPhiR[u]);
-- Real logGamIR = log(Pi[r]) + log(detRhoR) - 0.5*dotProduct;
--
-- //Z(i) = index of max (gam(i,:))
-- if (logGamIR > maxLogGamIR)
-- {
-- Z[i] = r;
-- maxLogGamIR = logGamIR;
-- }
-- sumLLF1 += exp(logGamIR) / pow(2*M_PI,m/2.0);
-- }
--
-- sumLogLLF2 += log(sumLLF1);
-- }
--
-- // Assign output variable LLF
-- *LLF = -invN * sumLogLLF2;
--
-- //newDeltaPhi = max(max((abs(phi-Phi))./(1+abs(phi))));
-- Real newDeltaPhi = 0.0;
-- for (int j=0; j<p; j++)
-- {
-- for (int jj=0; jj<m; jj++)
-- {
-- for (int r=0; r<k; r++)
-- {
-- Real tmpDist = fabs(phi[ai(j,jj,r,p,m,k)]-Phi[ai(j,jj,r,p,m,k)])
-- / (1.0+fabs(phi[ai(j,jj,r,p,m,k)]));
-- if (tmpDist > newDeltaPhi)
-- newDeltaPhi = tmpDist;
-- }
-- }
-- }
--
-- //update distance parameter to check algorithm convergence (delta(phi, Phi))
-- //TODO: deltaPhi should be a linked list for perf.
-- if (ite < deltaPhiBufferSize)
-- deltaPhi[ite] = newDeltaPhi;
-- else
-- {
-- sumDeltaPhi -= deltaPhi[0];
-- for (int u=0; u<deltaPhiBufferSize-1; u++)
-- deltaPhi[u] = deltaPhi[u+1];
-- deltaPhi[deltaPhiBufferSize-1] = newDeltaPhi;
-- }
-- sumDeltaPhi += newDeltaPhi;
--
-- // update other local variables
-- for (int j=0; j<m; j++)
-- {
-- for (int jj=0; jj<p; jj++)
-- {
-- for (int r=0; r<k; r++)
-- Phi[ai(jj,j,r,p,m,k)] = phi[ai(jj,j,r,p,m,k)];
-- }
-- }
-- ite++;
-- }
--
-- //free memory
-- free(hatBetaR);
-- free(deltaPhi);
-- free(Phi);
-- gsl_matrix_free(matrixE);
-- gsl_matrix_free(matrixM);
-- gsl_permutation_free(permutation);
-- gsl_vector_free(work);
-- gsl_matrix_free(V);
-- gsl_vector_free(S);
-- free(XiPhiR);
-- free(YiRhoR);
-- free(Xr);
-- free(Yr);
-- free(tXrXr);
-- free(tXrYr);
-- free(Z);
--}
+++ /dev/null
--#ifndef valse_EMGrank_H
--#define valse_EMGrank_H
--
--#include "utils.h"
--
--void EMGrank_core(
-- // IN parameters
-- const Real* Pi,
-- const Real* Rho,
-- int mini,
-- int maxi,
-- const Real* X,
-- const Real* Y,
-- Real tau,
-- const int* rank,
-- // OUT parameters
-- Real* phi,
-- Real* LLF,
-- // additional size parameters
-- int n,
-- int p,
-- int m,
-- int k);
--
--#endif
+++ /dev/null
--#ifndef valse_utils_H
--#define valse_utils_H
--
--//#include <stdint.h>
--
--/********
-- * Types
-- *******/
--
--typedef double Real;
--//typedef uint32_t UInt;
--//typedef int32_t Int;
--
--/*******************************
-- * Matrix and arrays indexation
-- *******************************/
--
--// Matrix Index ; TODO? ncol unused
--#define mi(i,j,nrow,ncol)\
-- j*nrow + i
--
--// Array Index ; TODO? d3 unused
--#define ai(i,j,k,d1,d2,d3)\
-- k*d1*d2 + j*d1 + i
--
--// Array4 Index ; TODO? ...
--#define ai4(i,j,k,m,d1,d2,d3,d4)\
-- m*d1*d2*d3 + k*d1*d2 + j*d1 + i
--
--/*************************
-- * Array copy & "zeroing"
-- ************************/
--
--// Fill an array with zeros
--#define zeroArray(array, size)\
--{\
-- for (int u=0; u<size; u++)\
-- array[u] = 0;\
--}
--
--// Copy an 1D array
--#define copyArray(array, copy, size)\
--{\
-- for (int u=0; u<size; u++)\
-- copy[u] = array[u];\
--}
--
--#endif
+++ /dev/null
--library(testthat)
--library(valse) #ou load_all()
--
--test_check("valse")
+++ /dev/null
--# Potential helpers for context 1
--help <- function()
--{
-- #...
--}
+++ /dev/null
--context("Context1")
--
--test_that("function 1...",
--{
-- #expect_lte( ..., ...)
--})
--
--test_that("function 2...",
--{
-- #expect_equal(..., ...)
--})
+++ /dev/null
--#ignore jupyter generated file (ipynb, HTML)
--*.html
--*.ipynb
--
--#and various (pdf)LaTeX files, in case of
--*.tex
--*.pdf
--*.aux
--*.dvi
--*.log
--*.out
--*.toc
--*.synctex.gz
--/figure/