From: Benjamin Auder Date: Fri, 14 Apr 2017 16:43:47 +0000 (+0200) Subject: remove pre-commit hook; fix weird formatting from formatR package X-Git-Url: https://git.auder.net/variants/current/doc/css/pieces/cr.svg?a=commitdiff_plain;h=1b698c1619dbcf5b3a0608dc894d249945d2bce3;p=valse.git remove pre-commit hook; fix weird formatting from formatR package --- diff --git a/hooks/pre-commit b/hooks/pre-commit.bad similarity index 100% rename from hooks/pre-commit rename to hooks/pre-commit.bad diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R index ee7a4fc..0a279f0 100644 --- a/pkg/R/EMGLLF.R +++ b/pkg/R/EMGLLF.R @@ -24,14 +24,14 @@ #' @export EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, eps, fast = TRUE) - { +{ if (!fast) { # Function in R return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, eps)) } - + # Function in C n <- nrow(X) #nombre d'echantillons p <- ncol(X) #nombre de covariables @@ -46,22 +46,21 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, # R version - slow but easy to read .EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X2, Y, eps) - { +{ # Matrix dimensions n <- dim(Y)[1] - if (length(dim(phiInit)) == 2) - { + if (length(dim(phiInit)) == 2) { p <- 1 m <- dim(phiInit)[1] k <- dim(phiInit)[2] - } else - { + } else { p <- dim(phiInit)[1] m <- dim(phiInit)[2] k <- dim(phiInit)[3] } X <- matrix(nrow = n, ncol = p) X[1:n, 1:p] <- X2 + # Outputs phi <- array(NA, dim = c(p, m, k)) phi[1:p, , ] <- phiInit @@ -69,7 +68,7 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, pi <- piInit llh <- -Inf S <- array(0, dim = c(p, m, k)) - + # Algorithm variables gam <- gamInit Gram2 <- array(0, dim = c(p, p, k)) @@ -77,33 +76,37 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X2 <- array(0, dim = c(n, p, k)) Y2 <- array(0, dim = c(n, m, k)) EPS <- 1e-15 - + for (ite in 1:maxi) { # Remember last pi,rho,phi values for exit condition in the end of loop Phi <- phi Rho <- rho Pi <- pi - + # Computations associated to X and Y for (r in 1:k) { - for (mm in 1:m) Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm] - for (i in 1:n) X2[i, , r] <- sqrt(gam[i, r]) * X[i, ] - for (mm in 1:m) ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r]) + for (mm in 1:m) + Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm] + for (i in 1:n) + X2[i, , r] <- sqrt(gam[i, r]) * X[i, ] + for (mm in 1:m) + ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r]) for (j in 1:p) { - for (s in 1:p) Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r]) + for (s in 1:p) + Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r]) } } - - ######### M step # - + + ## M step + # For pi b <- sapply(1:k, function(r) sum(abs(phi[, , r]))) gam2 <- colSums(gam) a <- sum(gam %*% log(pi)) - + # While the proportions are nonpositive kk <- 0 pi2AllPositive <- FALSE @@ -113,66 +116,64 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, pi2AllPositive <- all(pi2 >= 0) kk <- kk + 1 } - + # t(m) is the largest value in the grid O.1^k such that it is nonincreasing - while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < -sum(gam2 * log(pi2))/n + - lambda * sum(pi2^gamma * b)) - { + while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < + -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b)) + { pi2 <- pi + 0.1^kk * (1/n * gam2 - pi) kk <- kk + 1 } t <- 0.1^kk pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi)) - + # For phi and rho for (r in 1:k) { for (mm in 1:m) { ps <- 0 - for (i in 1:n) ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, - r]) + for (i in 1:n) + ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r]) nY2 <- sum(Y2[, mm, r]^2) rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2) } } - + for (r in 1:k) { for (j in 1:p) { for (mm in 1:m) { - S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + sum(phi[-j, mm, - r] * Gram2[j, -j, r]) - if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) - { - phi[j, mm, r] <- 0 - } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) - { - phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, - j, r] - } else - { - phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, - j, r] + S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + + sum(phi[-j, mm, r] * Gram2[j, -j, r]) + if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- 0 + } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) { + phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r] + } else { + phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r] } } } } - + ######## E step# - + # Precompute det(rho[,,r]) for r in 1...k detRho <- sapply(1:k, function(r) det(rho[, , r])) gam1 <- matrix(0, nrow = n, ncol = k) for (i in 1:n) { # Update gam[,] - for (r in 1:k) gam1[i, r] <- pi[r] * exp(-0.5 * sum((Y[i, ] %*% rho[, - , r] - X[i, ] %*% phi[, , r])^2)) * detRho[r] + for (r in 1:k) + { + gam1[i, r] <- pi[r] * exp(-0.5 + * sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2)) * detRho[r] + } } - gam <- gam1/rowSums(gam1) + gam <- gam1 / rowSums(gam1) sumLogLLH <- sum(log(rowSums(gam)) - log((2 * base::pi)^(m/2))) sumPen <- sum(pi^gamma * b) last_llh <- llh @@ -182,10 +183,10 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, Dist2 <- max((abs(rho - Rho))/(1 + abs(rho))) Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi))) dist2 <- max(Dist1, Dist2, Dist3) - + if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps))) break } - + list(phi = phi, rho = rho, pi = pi, llh = llh, S = S) } diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R index 436b289..db40948 100644 --- a/pkg/R/EMGrank.R +++ b/pkg/R/EMGrank.R @@ -1,4 +1,4 @@ -#' EMGrank +#' EMGrank #' #' Description de EMGrank #' @@ -23,7 +23,7 @@ EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast = TRUE) # Function in R return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank)) } - + # Function in C n <- nrow(X) #nombre d'echantillons p <- ncol(X) #nombre de covariables @@ -50,12 +50,12 @@ matricize <- function(X) p <- dim(X)[2] m <- dim(Rho)[2] k <- dim(Rho)[3] - + # init outputs phi <- array(0, dim = c(p, m, k)) Z <- rep(1, n) LLF <- 0 - + # local variables Phi <- array(0, dim = c(p, m, k)) deltaPhi <- c() @@ -69,12 +69,12 @@ matricize <- function(X) # M step: update for Beta ( and then phi) for (r in 1:k) { - Z_indice <- seq_len(n)[Z == r] #indices where Z == r + Z_indice <- seq_len(n)[Z == r] #indices where Z == r if (length(Z_indice) == 0) next # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr - s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% crossprod(matricize(X[Z_indice, - ]), matricize(Y[Z_indice, ]))) + s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) + %*% crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ]))) S <- s$d # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation # of the initial product @@ -82,7 +82,7 @@ matricize <- function(X) S[(rank[r] + 1):length(S)] <- 0 phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r] } - + # Step E and computation of the loglikelihood sumLogLLF2 <- 0 for (i in seq_len(n)) @@ -91,8 +91,7 @@ matricize <- function(X) maxLogGamIR <- -Inf for (r in seq_len(k)) { - dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, - , r]) + dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r]) logGamIR <- log(Pi[r]) + log(det(Rho[, , r])) - 0.5 * dotProduct # Z[i] = index of max (gam[i,]) if (logGamIR > maxLogGamIR) @@ -104,15 +103,15 @@ matricize <- function(X) } sumLogLLF2 <- sumLogLLF2 + log(sumLLF1) } - + LLF <- -1/n * sumLogLLF2 - + # update distance parameter to check algorithm convergence (delta(phi, Phi)) - deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain? + deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain? if (length(deltaPhi) > deltaPhiBufferSize) deltaPhi <- deltaPhi[2:length(deltaPhi)] sumDeltaPhi <- sum(abs(deltaPhi)) - + # update other local variables Phi <- phi ite <- ite + 1 diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R index 4b68bcd..c34c707 100644 --- a/pkg/R/computeGridLambda.R +++ b/pkg/R/computeGridLambda.R @@ -18,18 +18,19 @@ #' @export computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, tau, fast = TRUE) - { +{ n <- nrow(X) p <- dim(phiInit)[1] m <- dim(phiInit)[2] k <- dim(phiInit)[3] - + list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, X, Y, tau, fast) grid <- array(0, dim = c(p, m, k)) for (i in 1:p) { - for (j in 1:m) grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma) + for (j in 1:m) + grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma) } sort(unique(grid)) } diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R index 760da40..4f23bb0 100644 --- a/pkg/R/constructionModelesLassoMLE.R +++ b/pkg/R/constructionModelesLassoMLE.R @@ -22,7 +22,7 @@ #' @export constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, X, Y, eps, S, ncores = 3, fast = TRUE, verbose = FALSE) - { +{ if (ncores > 1) { cl <- parallel::makeCluster(ncores, outfile = "") @@ -30,16 +30,16 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", "ncores", "fast", "verbose")) } - + # Individual model computation computeAtLambda <- function(lambda) { if (ncores > 1) require("valse") #nodes start with an empty environment - + if (verbose) print(paste("Computations for lambda=", lambda)) - + n <- dim(X)[1] p <- dim(phiInit)[1] m <- dim(phiInit)[2] @@ -49,43 +49,45 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, col.sel <- which(sapply(sel.lambda, length) > 0) #if list of selected vars if (length(col.sel) == 0) return(NULL) - + # lambda == 0 because we compute the EMV: no penalization here res <- EMGLLF(phiInit[col.sel, , ], rhoInit, piInit, gamInit, mini, maxi, gamma, 0, X[, col.sel], Y, eps, fast) - + # Eval dimension from the result + selected phiLambda2 <- res$phi rhoLambda <- res$rho piLambda <- res$pi phiLambda <- array(0, dim = c(p, m, k)) - for (j in seq_along(col.sel)) phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, - sel.lambda[[j]], ] + for (j in seq_along(col.sel)) + phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ] dimension <- length(unlist(sel.lambda)) - + # Computation of the loglikelihood densite <- vector("double", n) for (r in 1:k) { if (length(col.sel) == 1) { - delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, - , r]))) - } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, - , r])) - densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m * - exp(-diag(tcrossprod(delta))/2) + delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r]))) + } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r])) + densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m + * exp(-diag(tcrossprod(delta))/2) } llhLambda <- c(sum(log(densite)), (dimension + m + 1) * k - 1) list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda) } - + # For each lambda, computation of the parameters - out <- if (ncores > 1) - parLapply(cl, 1:length(S), computeAtLambda) else lapply(1:length(S), computeAtLambda) - + out <- + if (ncores > 1) { + parLapply(cl, 1:length(S), computeAtLambda) + } else { + lapply(1:length(S), computeAtLambda) + } + if (ncores > 1) parallel::stopCluster(cl) - + out } diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R index b997303..5857a42 100644 --- a/pkg/R/constructionModelesLassoRank.R +++ b/pkg/R/constructionModelesLassoRank.R @@ -20,12 +20,12 @@ #' @export constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, ncores, fast = TRUE, verbose = FALSE) - { +{ n <- dim(X)[1] p <- dim(X)[2] m <- dim(Y)[2] L <- length(S) - + # Possible interesting ranks deltaRank <- rank.max - rank.min + 1 Size <- deltaRank^k @@ -42,7 +42,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, each = deltaRank^(k - r)), each = L) } RankLambda[, k + 1] <- rep(1:L, times = Size) - + if (ncores > 1) { cl <- parallel::makeCluster(ncores, outfile = "") @@ -50,23 +50,21 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", "verbose")) } - + computeAtLambda <- function(index) { lambdaIndex <- RankLambda[index, k + 1] rankIndex <- RankLambda[index, 1:k] if (ncores > 1) require("valse") #workers start with an empty environment - + # 'relevant' will be the set of relevant columns selected <- S[[lambdaIndex]]$selected relevant <- c() for (j in 1:p) { if (length(selected[[j]]) > 0) - { relevant <- c(relevant, j) - } } if (max(rankIndex) < length(relevant)) { @@ -75,25 +73,23 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, { res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, X[, relevant], Y, eps, rankIndex, fast) - llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + - m))) + llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m))) phi[relevant, , ] <- res$phi } list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho) } } - + # For each lambda in the grid we compute the estimators - out <- if (ncores > 1) - { - parLapply(cl, seq_len(length(S) * Size), computeAtLambda) - } else - { - lapply(seq_len(length(S) * Size), computeAtLambda) - } - + out <- + if (ncores > 1) { + parLapply(cl, seq_len(length(S) * Size), computeAtLambda) + } else { + lapply(seq_len(length(S) * Size), computeAtLambda) + } + if (ncores > 1) parallel::stopCluster(cl) - + out } diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R index fe86045..064b54b 100644 --- a/pkg/R/generateXY.R +++ b/pkg/R/generateXY.R @@ -17,14 +17,14 @@ generateXY <- function(n, π, meanX, β, covX, covY) p <- dim(covX)[1] m <- dim(covY)[1] k <- dim(covY)[3] - + X <- matrix(nrow = 0, ncol = p) Y <- matrix(nrow = 0, ncol = m) - + # random generation of the size of each population in X~Y (unordered) sizePop <- rmultinom(1, n, π) - class <- c() #map i in 1:n --> index of class in 1:k - + class <- c() #map i in 1:n --> index of class in 1:k + for (i in 1:k) { class <- c(class, rep(i, sizePop[i])) @@ -33,7 +33,7 @@ generateXY <- function(n, π, meanX, β, covX, covY) Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% β[, , i], covY[, , i])))) } - + shuffle <- sample(n) list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle]) } diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R index fafa2c4..d1ade1c 100644 --- a/pkg/R/initSmallEM.R +++ b/pkg/R/initSmallEM.R @@ -23,24 +23,22 @@ initSmallEM <- function(k, X, Y, fast = TRUE) piInit1 <- matrix(0, nIte, k) gamInit1 <- array(0, dim = c(n, k, nIte)) LLFinit1 <- list() - + # require(MASS) #Moore-Penrose generalized inverse of matrix for (repet in 1:nIte) { distance_clus <- dist(cbind(X, Y)) tree_hier <- hclust(distance_clus) Zinit1[, repet] <- cutree(tree_hier, k) - + for (r in 1:k) { Z <- Zinit1[, repet] Z_indice <- seq_len(n)[Z == r] #renvoit les indices où Z==r - if (length(Z_indice) == 1) - { + if (length(Z_indice) == 1) { betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% crossprod(t(X[Z_indice, ]), Y[Z_indice, ]) - } else - { + } else { betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% crossprod(X[Z_indice, ], Y[Z_indice, ]) } @@ -49,26 +47,26 @@ initSmallEM <- function(k, X, Y, fast = TRUE) rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet]) piInit1[repet, r] <- mean(Z == r) } - + for (i in 1:n) { for (r in 1:k) { - dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] - X[i, - ] %*% phiInit1[, , r, repet]) - Gam[i, r] <- piInit1[repet, r] * det(rhoInit1[, , r, repet]) * exp(-0.5 * - dotProduct) + dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] + - X[i, ] %*% phiInit1[, , r, repet]) + Gam[i, r] <- piInit1[repet, r] + * det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct) } sumGamI <- sum(Gam[i, ]) gamInit1[i, , repet] <- Gam[i, ]/sumGamI } - + miniInit <- 10 maxiInit <- 11 - - init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, - ], gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, - Y, eps = 1e-04, fast) + + init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ], + gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y, + eps = 1e-04, fast) LLFEessai <- init_EMG$LLF LLFinit1[repet] <- LLFEessai[length(LLFEessai)] } @@ -77,6 +75,6 @@ initSmallEM <- function(k, X, Y, fast = TRUE) rhoInit <- rhoInit1[, , , b] piInit <- piInit1[b, ] gamInit <- gamInit1[, , b] - + return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit)) } diff --git a/pkg/R/main.R b/pkg/R/main.R index 3b9620d..64e0586 100644 --- a/pkg/R/main.R +++ b/pkg/R/main.R @@ -30,29 +30,28 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, ncores_inner = 1, thresh = 1e-08, size_coll_mod = 10, fast = TRUE, verbose = FALSE, plot = TRUE) - { +{ p <- dim(X)[2] m <- dim(Y)[2] n <- dim(X)[1] - + if (verbose) print("main loop: over all k and all lambda") - - if (ncores_outer > 1) - { + + if (ncores_outer > 1) { cl <- parallel::makeCluster(ncores_outer, outfile = "") parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", "size_coll_mod", "verbose", "p", "m")) } - + # Compute models with k components computeModels <- function(k) { if (ncores_outer > 1) require("valse") #nodes start with an empty environment - + if (verbose) print(paste("Parameters initialization for k =", k)) # smallEM initializes parameters by k-means and regression model in each @@ -63,25 +62,22 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi X, Y, gamma, mini, maxi, eps, fast) if (length(grid_lambda) > size_coll_mod) grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)] - + if (verbose) print("Compute relevant parameters") # select variables according to each regularization parameter from the grid: # S$selected corresponding to selected variables S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast) - - if (procedure == "LassoMLE") - { + + if (procedure == "LassoMLE") { if (verbose) print("run the procedure Lasso-MLE") # compute parameter estimations, with the Maximum Likelihood Estimator, # restricted on selected variables. models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose) - - } else - { + } else { if (verbose) print("run the procedure Lasso-Rank") # compute parameter estimations, with the Low Rank Estimator, restricted on @@ -93,19 +89,23 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi models <- models[sapply(models, function(cell) !is.null(cell))] models } - + # List (index k) of lists (index lambda) of models - models_list <- if (ncores_outer > 1) - parLapply(cl, kmin:kmax, computeModels) else lapply(kmin:kmax, computeModels) + models_list <- + if (ncores_outer > 1) { + parLapply(cl, kmin:kmax, computeModels) + } else { + lapply(kmin:kmax, computeModels) + } if (ncores_outer > 1) parallel::stopCluster(cl) - + if (!requireNamespace("capushe", quietly = TRUE)) { warning("'capushe' not available: returning all models") return(models_list) } - + # Get summary 'tableauRecap' from models tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i) { @@ -118,41 +118,35 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, complexity = sumPen, contrast = -LLH) })) - - print(tableauRecap) tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ] - + modSel <- capushe::capushe(tableauRecap, n) indModSel <- if (selecMod == "DDSE") as.numeric(modSel@DDSE@model) else if (selecMod == "Djump") as.numeric(modSel@Djump@model) else if (selecMod == "BIC") modSel@BIC_capushe$model else if (selecMod == "AIC") modSel@AIC_capushe$model - + mod <- as.character(tableauRecap[indModSel, 1]) listMod <- as.integer(unlist(strsplit(mod, "[.]"))) modelSel <- models_list[[listMod[1]]][[listMod[2]]] - + ## Affectations Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n) for (i in 1:n) { for (r in 1:length(modelSel$pi)) { - sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, - , r])^2) - Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[, - , r]) + sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2) + Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[, , r]) } } Gam <- Gam/rowSums(Gam) modelSel$affec <- apply(Gam, 1, which.max) modelSel$proba <- Gam - + if (plot) - { print(plot_valse(X, Y, modelSel, n)) - } - + return(modelSel) } diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R index 6207061..ec2302d 100644 --- a/pkg/R/plot_valse.R +++ b/pkg/R/plot_valse.R @@ -18,7 +18,7 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA) require("ggplot2") require("reshape2") require("cowplot") - + K <- length(model$pi) ## regression matrices gReg <- list() @@ -27,51 +27,47 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA) Melt <- melt(t((model$phi[, , r]))) gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", - midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", - r)) + midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r)) } print(gReg) - + ## Differences between two clusters if (comp) { if (is.na(k1) || is.na(k)) - { print("k1 and k2 must be integers, representing the clusters you want to compare") - } Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2])) - gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + - scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, - space = "Lab") + ggtitle(paste("Difference between regression matrices in cluster", - k1, "and", k2)) + gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle(paste("Difference between regression matrices in cluster", + k1, "and", k2)) print(gDiff) - } - + ### Covariance matrices matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K) for (r in 1:K) - { matCov[, r] <- diag(model$rho[, , r]) - } MeltCov <- melt(matCov) - gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + - scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, - space = "Lab") + ggtitle("Covariance matrices") + gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, + space = "Lab") + + ggtitle("Covariance matrices") print(gCov) - + ### Proportions gam2 <- matrix(NA, ncol = K, nrow = n) for (i in 1:n) - { gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i]) - } - - bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + - geom_boxplot() + theme(legend.position = "none") + background_grid(major = "xy", - minor = "none") + + bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + + geom_boxplot() + + theme(legend.position = "none") + + background_grid(major = "xy", minor = "none") print(bp) - + ### Mean in each cluster XY <- cbind(X, Y) XY_class <- list() @@ -79,18 +75,15 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA) for (r in 1:K) { XY_class[[r]] <- XY[model$affec == r, ] - if (sum(model$affec == r) == 1) - { + if (sum(model$affec == r) == 1) { meanPerClass[, r] <- XY_class[[r]] - } else - { + } else { meanPerClass[, r] <- apply(XY_class[[r]], 2, mean) } } - data <- data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K, - each = dim(XY)[2])), time = rep(1:dim(XY)[2], K)) + data <- data.frame(mean = as.vector(meanPerClass), + cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K)) g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster)) - print(g + geom_line(aes(linetype = cluster, color = cluster)) + geom_point(aes(color = cluster)) + - ggtitle("Mean per cluster")) - + print(g + geom_line(aes(linetype = cluster, color = cluster)) + + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster")) } diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R index fe0688c..f717cae 100644 --- a/pkg/R/selectVariables.R +++ b/pkg/R/selectVariables.R @@ -24,35 +24,33 @@ #' selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast = TRUE) - { - if (ncores > 1) - { +{ + if (ncores > 1) { cl <- parallel::makeCluster(ncores, outfile = "") parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment()) } - + # Computation for a fixed lambda computeCoefs <- function(lambda) { params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, eps, fast) - + p <- dim(phiInit)[1] m <- dim(phiInit)[2] - + # selectedVariables: list where element j contains vector of selected variables # in [1,m] - selectedVariables <- lapply(1:p, function(j) - { + selectedVariables <- lapply(1:p, function(j) { # from boolean matrix mxk of selected variables obtain the corresponding boolean # m-vector, and finally return the corresponding indices seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)] }) - + list(selected = selectedVariables, Rho = params$rho, Pi = params$pi) } - + # For each lambda in the grid, we compute the coefficients out <- if (ncores > 1) parLapply(cl, glambda, computeCoefs) else lapply(glambda, computeCoefs) @@ -67,8 +65,6 @@ selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma ind_uniq <- which(!ind_dup) out2 <- list() for (l in 1:length(ind_uniq)) - { out2[[l]] <- out[[ind_uniq[l]]] - } out2 }