#' @export
EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
X, Y, eps, fast = TRUE)
- {
+{
if (!fast)
{
# Function in R
return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
X, Y, eps))
}
-
+
# Function in C
n <- nrow(X) #nombre d'echantillons
p <- ncol(X) #nombre de covariables
# R version - slow but easy to read
.EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
X2, Y, eps)
- {
+{
# Matrix dimensions
n <- dim(Y)[1]
- if (length(dim(phiInit)) == 2)
- {
+ if (length(dim(phiInit)) == 2) {
p <- 1
m <- dim(phiInit)[1]
k <- dim(phiInit)[2]
- } else
- {
+ } else {
p <- dim(phiInit)[1]
m <- dim(phiInit)[2]
k <- dim(phiInit)[3]
}
X <- matrix(nrow = n, ncol = p)
X[1:n, 1:p] <- X2
+
# Outputs
phi <- array(NA, dim = c(p, m, k))
phi[1:p, , ] <- phiInit
pi <- piInit
llh <- -Inf
S <- array(0, dim = c(p, m, k))
-
+
# Algorithm variables
gam <- gamInit
Gram2 <- array(0, dim = c(p, p, k))
X2 <- array(0, dim = c(n, p, k))
Y2 <- array(0, dim = c(n, m, k))
EPS <- 1e-15
-
+
for (ite in 1:maxi)
{
# Remember last pi,rho,phi values for exit condition in the end of loop
Phi <- phi
Rho <- rho
Pi <- pi
-
+
# Computations associated to X and Y
for (r in 1:k)
{
- for (mm in 1:m) Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
- for (i in 1:n) X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
- for (mm in 1:m) ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
+ for (mm in 1:m)
+ Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
+ for (i in 1:n)
+ X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
+ for (mm in 1:m)
+ ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
for (j in 1:p)
{
- for (s in 1:p) Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
+ for (s in 1:p)
+ Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
}
}
-
- ######### M step #
-
+
+ ## M step
+
# For pi
b <- sapply(1:k, function(r) sum(abs(phi[, , r])))
gam2 <- colSums(gam)
a <- sum(gam %*% log(pi))
-
+
# While the proportions are nonpositive
kk <- 0
pi2AllPositive <- FALSE
pi2AllPositive <- all(pi2 >= 0)
kk <- kk + 1
}
-
+
# t(m) is the largest value in the grid O.1^k such that it is nonincreasing
- while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < -sum(gam2 * log(pi2))/n +
- lambda * sum(pi2^gamma * b))
- {
+ while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+ -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b))
+ {
pi2 <- pi + 0.1^kk * (1/n * gam2 - pi)
kk <- kk + 1
}
t <- 0.1^kk
pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi))
-
+
# For phi and rho
for (r in 1:k)
{
for (mm in 1:m)
{
ps <- 0
- for (i in 1:n) ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm,
- r])
+ for (i in 1:n)
+ ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r])
nY2 <- sum(Y2[, mm, r]^2)
rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2)
}
}
-
+
for (r in 1:k)
{
for (j in 1:p)
{
for (mm in 1:m)
{
- S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + sum(phi[-j, mm,
- r] * Gram2[j, -j, r])
- if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma))
- {
- phi[j, mm, r] <- 0
- } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma))
- {
- phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j,
- j, r]
- } else
- {
- phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j,
- j, r]
+ S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r]
+ + sum(phi[-j, mm, r] * Gram2[j, -j, r])
+ if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) {
+ phi[j, mm, r] <- 0
+ } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) {
+ phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r]
+ } else {
+ phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r]
}
}
}
}
-
+
######## E step#
-
+
# Precompute det(rho[,,r]) for r in 1...k
detRho <- sapply(1:k, function(r) det(rho[, , r]))
gam1 <- matrix(0, nrow = n, ncol = k)
for (i in 1:n)
{
# Update gam[,]
- for (r in 1:k) gam1[i, r] <- pi[r] * exp(-0.5 * sum((Y[i, ] %*% rho[,
- , r] - X[i, ] %*% phi[, , r])^2)) * detRho[r]
+ for (r in 1:k)
+ {
+ gam1[i, r] <- pi[r] * exp(-0.5
+ * sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2)) * detRho[r]
+ }
}
- gam <- gam1/rowSums(gam1)
+ gam <- gam1 / rowSums(gam1)
sumLogLLH <- sum(log(rowSums(gam)) - log((2 * base::pi)^(m/2)))
sumPen <- sum(pi^gamma * b)
last_llh <- llh
Dist2 <- max((abs(rho - Rho))/(1 + abs(rho)))
Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi)))
dist2 <- max(Dist1, Dist2, Dist3)
-
+
if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps)))
break
}
-
+
list(phi = phi, rho = rho, pi = pi, llh = llh, S = S)
}
-#' EMGrank
+#' EMGrank
#'
#' Description de EMGrank
#'
# Function in R
return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
}
-
+
# Function in C
n <- nrow(X) #nombre d'echantillons
p <- ncol(X) #nombre de covariables
p <- dim(X)[2]
m <- dim(Rho)[2]
k <- dim(Rho)[3]
-
+
# init outputs
phi <- array(0, dim = c(p, m, k))
Z <- rep(1, n)
LLF <- 0
-
+
# local variables
Phi <- array(0, dim = c(p, m, k))
deltaPhi <- c()
# M step: update for Beta ( and then phi)
for (r in 1:k)
{
- Z_indice <- seq_len(n)[Z == r] #indices where Z == r
+ Z_indice <- seq_len(n)[Z == r] #indices where Z == r
if (length(Z_indice) == 0)
next
# U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
- s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% crossprod(matricize(X[Z_indice,
- ]), matricize(Y[Z_indice, ])))
+ s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ])))
+ %*% crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ])))
S <- s$d
# Set m-rank(r) singular values to zero, and recompose best rank(r) approximation
# of the initial product
S[(rank[r] + 1):length(S)] <- 0
phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r]
}
-
+
# Step E and computation of the loglikelihood
sumLogLLF2 <- 0
for (i in seq_len(n))
maxLogGamIR <- -Inf
for (r in seq_len(k))
{
- dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[,
- , r])
+ dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r])
logGamIR <- log(Pi[r]) + log(det(Rho[, , r])) - 0.5 * dotProduct
# Z[i] = index of max (gam[i,])
if (logGamIR > maxLogGamIR)
}
sumLogLLF2 <- sumLogLLF2 + log(sumLLF1)
}
-
+
LLF <- -1/n * sumLogLLF2
-
+
# update distance parameter to check algorithm convergence (delta(phi, Phi))
- deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
+ deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
if (length(deltaPhi) > deltaPhiBufferSize)
deltaPhi <- deltaPhi[2:length(deltaPhi)]
sumDeltaPhi <- sum(abs(deltaPhi))
-
+
# update other local variables
Phi <- phi
ite <- ite + 1
#' @export
computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini,
maxi, tau, fast = TRUE)
- {
+{
n <- nrow(X)
p <- dim(phiInit)[1]
m <- dim(phiInit)[2]
k <- dim(phiInit)[3]
-
+
list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0,
X, Y, tau, fast)
grid <- array(0, dim = c(p, m, k))
for (i in 1:p)
{
- for (j in 1:m) grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma)
+ for (j in 1:m)
+ grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma)
}
sort(unique(grid))
}
#' @export
constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
maxi, gamma, X, Y, eps, S, ncores = 3, fast = TRUE, verbose = FALSE)
- {
+{
if (ncores > 1)
{
cl <- parallel::makeCluster(ncores, outfile = "")
"rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S",
"ncores", "fast", "verbose"))
}
-
+
# Individual model computation
computeAtLambda <- function(lambda)
{
if (ncores > 1)
require("valse") #nodes start with an empty environment
-
+
if (verbose)
print(paste("Computations for lambda=", lambda))
-
+
n <- dim(X)[1]
p <- dim(phiInit)[1]
m <- dim(phiInit)[2]
col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars
if (length(col.sel) == 0)
return(NULL)
-
+
# lambda == 0 because we compute the EMV: no penalization here
res <- EMGLLF(phiInit[col.sel, , ], rhoInit, piInit, gamInit, mini, maxi,
gamma, 0, X[, col.sel], Y, eps, fast)
-
+
# Eval dimension from the result + selected
phiLambda2 <- res$phi
rhoLambda <- res$rho
piLambda <- res$pi
phiLambda <- array(0, dim = c(p, m, k))
- for (j in seq_along(col.sel)) phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j,
- sel.lambda[[j]], ]
+ for (j in seq_along(col.sel))
+ phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ]
dimension <- length(unlist(sel.lambda))
-
+
# Computation of the loglikelihood
densite <- vector("double", n)
for (r in 1:k)
{
if (length(col.sel) == 1)
{
- delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel,
- , r])))
- } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel,
- , r]))
- densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m *
- exp(-diag(tcrossprod(delta))/2)
+ delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r])))
+ } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r]))
+ densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m
+ * exp(-diag(tcrossprod(delta))/2)
}
llhLambda <- c(sum(log(densite)), (dimension + m + 1) * k - 1)
list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda)
}
-
+
# For each lambda, computation of the parameters
- out <- if (ncores > 1)
- parLapply(cl, 1:length(S), computeAtLambda) else lapply(1:length(S), computeAtLambda)
-
+ out <-
+ if (ncores > 1) {
+ parLapply(cl, 1:length(S), computeAtLambda)
+ } else {
+ lapply(1:length(S), computeAtLambda)
+ }
+
if (ncores > 1)
parallel::stopCluster(cl)
-
+
out
}
#' @export
constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max,
ncores, fast = TRUE, verbose = FALSE)
- {
+{
n <- dim(X)[1]
p <- dim(X)[2]
m <- dim(Y)[2]
L <- length(S)
-
+
# Possible interesting ranks
deltaRank <- rank.max - rank.min + 1
Size <- deltaRank^k
each = deltaRank^(k - r)), each = L)
}
RankLambda[, k + 1] <- rep(1:L, times = Size)
-
+
if (ncores > 1)
{
cl <- parallel::makeCluster(ncores, outfile = "")
"Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores",
"verbose"))
}
-
+
computeAtLambda <- function(index)
{
lambdaIndex <- RankLambda[index, k + 1]
rankIndex <- RankLambda[index, 1:k]
if (ncores > 1)
require("valse") #workers start with an empty environment
-
+
# 'relevant' will be the set of relevant columns
selected <- S[[lambdaIndex]]$selected
relevant <- c()
for (j in 1:p)
{
if (length(selected[[j]]) > 0)
- {
relevant <- c(relevant, j)
- }
}
if (max(rankIndex) < length(relevant))
{
{
res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi,
X[, relevant], Y, eps, rankIndex, fast)
- llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex +
- m)))
+ llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m)))
phi[relevant, , ] <- res$phi
}
list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho)
}
}
-
+
# For each lambda in the grid we compute the estimators
- out <- if (ncores > 1)
- {
- parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
- } else
- {
- lapply(seq_len(length(S) * Size), computeAtLambda)
- }
-
+ out <-
+ if (ncores > 1) {
+ parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
+ } else {
+ lapply(seq_len(length(S) * Size), computeAtLambda)
+ }
+
if (ncores > 1)
parallel::stopCluster(cl)
-
+
out
}
p <- dim(covX)[1]
m <- dim(covY)[1]
k <- dim(covY)[3]
-
+
X <- matrix(nrow = 0, ncol = p)
Y <- matrix(nrow = 0, ncol = m)
-
+
# random generation of the size of each population in X~Y (unordered)
sizePop <- rmultinom(1, n, π)
- class <- c() #map i in 1:n --> index of class in 1:k
-
+ class <- c() #map i in 1:n --> index of class in 1:k
+
for (i in 1:k)
{
class <- c(class, rep(i, sizePop[i]))
Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*%
β[, , i], covY[, , i]))))
}
-
+
shuffle <- sample(n)
list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle])
}
piInit1 <- matrix(0, nIte, k)
gamInit1 <- array(0, dim = c(n, k, nIte))
LLFinit1 <- list()
-
+
# require(MASS) #Moore-Penrose generalized inverse of matrix
for (repet in 1:nIte)
{
distance_clus <- dist(cbind(X, Y))
tree_hier <- hclust(distance_clus)
Zinit1[, repet] <- cutree(tree_hier, k)
-
+
for (r in 1:k)
{
Z <- Zinit1[, repet]
Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r
- if (length(Z_indice) == 1)
- {
+ if (length(Z_indice) == 1) {
betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*%
crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
- } else
- {
+ } else {
betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*%
crossprod(X[Z_indice, ], Y[Z_indice, ])
}
rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
piInit1[repet, r] <- mean(Z == r)
}
-
+
for (i in 1:n)
{
for (r in 1:k)
{
- dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] - X[i,
- ] %*% phiInit1[, , r, repet])
- Gam[i, r] <- piInit1[repet, r] * det(rhoInit1[, , r, repet]) * exp(-0.5 *
- dotProduct)
+ dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
+ - X[i, ] %*% phiInit1[, , r, repet])
+ Gam[i, r] <- piInit1[repet, r]
+ * det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
}
sumGamI <- sum(Gam[i, ])
gamInit1[i, , repet] <- Gam[i, ]/sumGamI
}
-
+
miniInit <- 10
maxiInit <- 11
-
- init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet,
- ], gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X,
- Y, eps = 1e-04, fast)
+
+ init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ],
+ gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y,
+ eps = 1e-04, fast)
LLFEessai <- init_EMG$LLF
LLFinit1[repet] <- LLFEessai[length(LLFEessai)]
}
rhoInit <- rhoInit1[, , , b]
piInit <- piInit1[b, ]
gamInit <- gamInit1[, , b]
-
+
return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
}
maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1,
ncores_inner = 1, thresh = 1e-08, size_coll_mod = 10, fast = TRUE, verbose = FALSE,
plot = TRUE)
- {
+{
p <- dim(X)[2]
m <- dim(Y)[2]
n <- dim(X)[1]
-
+
if (verbose)
print("main loop: over all k and all lambda")
-
- if (ncores_outer > 1)
- {
+
+ if (ncores_outer > 1) {
cl <- parallel::makeCluster(ncores_outer, outfile = "")
parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X",
"Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin",
"kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh",
"size_coll_mod", "verbose", "p", "m"))
}
-
+
# Compute models with k components
computeModels <- function(k)
{
if (ncores_outer > 1)
require("valse") #nodes start with an empty environment
-
+
if (verbose)
print(paste("Parameters initialization for k =", k))
# smallEM initializes parameters by k-means and regression model in each
X, Y, gamma, mini, maxi, eps, fast)
if (length(grid_lambda) > size_coll_mod)
grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
-
+
if (verbose)
print("Compute relevant parameters")
# select variables according to each regularization parameter from the grid:
# S$selected corresponding to selected variables
S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi,
gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
-
- if (procedure == "LassoMLE")
- {
+
+ if (procedure == "LassoMLE") {
if (verbose)
print("run the procedure Lasso-MLE")
# compute parameter estimations, with the Maximum Likelihood Estimator,
# restricted on selected variables.
models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit,
P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
-
- } else
- {
+ } else {
if (verbose)
print("run the procedure Lasso-Rank")
# compute parameter estimations, with the Low Rank Estimator, restricted on
models <- models[sapply(models, function(cell) !is.null(cell))]
models
}
-
+
# List (index k) of lists (index lambda) of models
- models_list <- if (ncores_outer > 1)
- parLapply(cl, kmin:kmax, computeModels) else lapply(kmin:kmax, computeModels)
+ models_list <-
+ if (ncores_outer > 1) {
+ parLapply(cl, kmin:kmax, computeModels)
+ } else {
+ lapply(kmin:kmax, computeModels)
+ }
if (ncores_outer > 1)
parallel::stopCluster(cl)
-
+
if (!requireNamespace("capushe", quietly = TRUE))
{
warning("'capushe' not available: returning all models")
return(models_list)
}
-
+
# Get summary 'tableauRecap' from models
tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
{
data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n,
complexity = sumPen, contrast = -LLH)
}))
-
- print(tableauRecap)
tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
-
+
modSel <- capushe::capushe(tableauRecap, n)
indModSel <- if (selecMod == "DDSE")
as.numeric(modSel@DDSE@model) else if (selecMod == "Djump")
as.numeric(modSel@Djump@model) else if (selecMod == "BIC")
modSel@BIC_capushe$model else if (selecMod == "AIC")
modSel@AIC_capushe$model
-
+
mod <- as.character(tableauRecap[indModSel, 1])
listMod <- as.integer(unlist(strsplit(mod, "[.]")))
modelSel <- models_list[[listMod[1]]][[listMod[2]]]
-
+
## Affectations
Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n)
for (i in 1:n)
{
for (r in 1:length(modelSel$pi))
{
- sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[,
- , r])^2)
- Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[,
- , r])
+ sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2)
+ Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[, , r])
}
}
Gam <- Gam/rowSums(Gam)
modelSel$affec <- apply(Gam, 1, which.max)
modelSel$proba <- Gam
-
+
if (plot)
- {
print(plot_valse(X, Y, modelSel, n))
- }
-
+
return(modelSel)
}
require("ggplot2")
require("reshape2")
require("cowplot")
-
+
K <- length(model$pi)
## regression matrices
gReg <- list()
Melt <- melt(t((model$phi[, , r])))
gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white",
- midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster",
- r))
+ midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
}
print(gReg)
-
+
## Differences between two clusters
if (comp)
{
if (is.na(k1) || is.na(k))
- {
print("k1 and k2 must be integers, representing the clusters you want to compare")
- }
Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
- gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + geom_tile() +
- scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
- space = "Lab") + ggtitle(paste("Difference between regression matrices in cluster",
- k1, "and", k2))
+ gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
+ + geom_tile()
+ + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+ space = "Lab")
+ + ggtitle(paste("Difference between regression matrices in cluster",
+ k1, "and", k2))
print(gDiff)
-
}
-
+
### Covariance matrices
matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
for (r in 1:K)
- {
matCov[, r] <- diag(model$rho[, , r])
- }
MeltCov <- melt(matCov)
- gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() +
- scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
- space = "Lab") + ggtitle("Covariance matrices")
+ gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
+ + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0,
+ space = "Lab")
+ + ggtitle("Covariance matrices")
print(gCov)
-
+
### Proportions
gam2 <- matrix(NA, ncol = K, nrow = n)
for (i in 1:n)
- {
gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
- }
-
- bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) +
- geom_boxplot() + theme(legend.position = "none") + background_grid(major = "xy",
- minor = "none")
+
+ bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2))
+ + geom_boxplot()
+ + theme(legend.position = "none")
+ + background_grid(major = "xy", minor = "none")
print(bp)
-
+
### Mean in each cluster
XY <- cbind(X, Y)
XY_class <- list()
for (r in 1:K)
{
XY_class[[r]] <- XY[model$affec == r, ]
- if (sum(model$affec == r) == 1)
- {
+ if (sum(model$affec == r) == 1) {
meanPerClass[, r] <- XY_class[[r]]
- } else
- {
+ } else {
meanPerClass[, r] <- apply(XY_class[[r]], 2, mean)
}
}
- data <- data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K,
- each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
+ data <- data.frame(mean = as.vector(meanPerClass),
+ cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
- print(g + geom_line(aes(linetype = cluster, color = cluster)) + geom_point(aes(color = cluster)) +
- ggtitle("Mean per cluster"))
-
+ print(g + geom_line(aes(linetype = cluster, color = cluster))
+ + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster"))
}
#'
selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma,
glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast = TRUE)
- {
- if (ncores > 1)
- {
+{
+ if (ncores > 1) {
cl <- parallel::makeCluster(ncores, outfile = "")
parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit",
"mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
}
-
+
# Computation for a fixed lambda
computeCoefs <- function(lambda)
{
params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
X, Y, eps, fast)
-
+
p <- dim(phiInit)[1]
m <- dim(phiInit)[2]
-
+
# selectedVariables: list where element j contains vector of selected variables
# in [1,m]
- selectedVariables <- lapply(1:p, function(j)
- {
+ selectedVariables <- lapply(1:p, function(j) {
# from boolean matrix mxk of selected variables obtain the corresponding boolean
# m-vector, and finally return the corresponding indices
seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
})
-
+
list(selected = selectedVariables, Rho = params$rho, Pi = params$pi)
}
-
+
# For each lambda in the grid, we compute the coefficients
out <- if (ncores > 1)
parLapply(cl, glambda, computeCoefs) else lapply(glambda, computeCoefs)
ind_uniq <- which(!ind_dup)
out2 <- list()
for (l in 1:length(ind_uniq))
- {
out2[[l]] <- out[[ind_uniq[l]]]
- }
out2
}