From: emilie Date: Fri, 21 Apr 2017 13:37:19 +0000 (+0200) Subject: fix few things for the LLF X-Git-Url: https://git.auder.net/?p=valse.git;a=commitdiff_plain;h=ca277ac5ab51fef149014eb5e4610403fdb3227b fix few things for the LLF --- diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R index 6ee7ba7..f944f98 100644 --- a/pkg/R/EMGLLF.R +++ b/pkg/R/EMGLLF.R @@ -174,7 +174,7 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, sumPen <- sum(pi^gamma * b) last_llh <- llh - llh <- -sumLogLLH/n + lambda * sumPen + llh <- -sumLogLLH/n #+ lambda * sumPen dist <- ifelse(ite == 1, llh, (llh - last_llh)/(1 + abs(llh))) Dist1 <- max((abs(phi - Phi))/(1 + abs(phi))) Dist2 <- max((abs(rho - Rho))/(1 + abs(rho))) diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R index c2e9c8c..597d5c8 100644 --- a/pkg/R/computeGridLambda.R +++ b/pkg/R/computeGridLambda.R @@ -3,8 +3,8 @@ #' Construct the data-driven grid for the regularization parameters used for the Lasso estimator #' #' @param phiInit value for phi -#' @param rhoInit\tvalue for rho -#' @param piInit\tvalue for pi +#' @param rhoInit for rho +#' @param piInit for pi #' @param gamInit value for gamma #' @param X matrix of covariates (of size n*p) #' @param Y matrix of responses (of size n*m) @@ -27,10 +27,10 @@ computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mi list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, X, Y, tau, fast) grid <- array(0, dim = c(p, m, k)) - for (i in 1:p) + for (j in 1:p) { - for (j in 1:m) - grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma) + for (mm in 1:m) + grid[j, mm, ] <- abs(list_EMG$S[j, mm, ])/(n * list_EMG$pi^gamma) } sort(unique(grid)) } diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R index 90d0a2a..3967dfc 100644 --- a/pkg/R/constructionModelesLassoMLE.R +++ b/pkg/R/constructionModelesLassoMLE.R @@ -63,18 +63,36 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ] dimension <- length(unlist(sel.lambda)) - # Computation of the loglikelihood - densite <- vector("double", n) - for (r in 1:k) + ## Computation of the loglikelihood + # Precompute det(rhoLambda[,,r]) for r in 1...k + detRho <- sapply(1:k, function(r) det(rhoLambda[, , r])) + sumLogLLH <- 0 + for (i in 1:n) { - if (length(col.sel) == 1) - { - delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r]))) - } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r])) - densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m * - exp(-diag(tcrossprod(delta))/2) + # Update gam[,]; use log to avoid numerical problems + logGam <- sapply(1:k, function(r) { + log(piLambda[r]) + log(detRho[r]) - 0.5 * + sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + }) + + logGam <- logGam - max(logGam) #adjust without changing proportions + gam[i, ] <- exp(logGam) + norm_fact <- sum(gam[i, ]) + gam[i, ] <- gam[i, ] / norm_fact + sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2)) } - llhLambda <- c(sum(log(densite)), (dimension + m + 1) * k - 1) + llhLambda <- c(sumLogLLH/n, (dimension + m + 1) * k - 1) + # densite <- vector("double", n) + # for (r in 1:k) + # { + # if (length(col.sel) == 1) + # { + # delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r]))) + # } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r])) + # densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m * + # exp(-rowSums(delta^2)/2) + # } + # llhLambda <- c(mean(log(densite)), (dimension + m + 1) * k - 1) list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda) } diff --git a/pkg/R/main.R b/pkg/R/main.R index fecf519..af05061 100644 --- a/pkg/R/main.R +++ b/pkg/R/main.R @@ -17,6 +17,8 @@ #' @param ncores_outer Number of cores for the outer loop on k #' @param ncores_inner Number of cores for the inner loop on lambda #' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 +#' @param compute_grid_lambda, TRUE to compute the grid, FALSE if known (in arguments) +#' @param grid_lambda, a vector with regularization parameters if known, by default 0 #' @param size_coll_mod (Maximum) size of a collection of models #' @param fast TRUE to use compiled C code, FALSE for R code only #' @param verbose TRUE to show some execution traces @@ -28,7 +30,7 @@ #' @export valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mini = 10, maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, - ncores_inner = 1, thresh = 1e-08, size_coll_mod = 10, fast = TRUE, verbose = FALSE, + ncores_inner = 1, thresh = 1e-08, compute_grid_lambda = TRUE, grid_lambda = 0, size_coll_mod = 10, fast = TRUE, verbose = FALSE, plot = TRUE) { p <- dim(X)[2] @@ -58,8 +60,11 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi # component, doing this 20 times, and keeping the values maximizing the # likelihood after 10 iterations of the EM algorithm. P <- initSmallEM(k, X, Y, fast) - grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, - X, Y, gamma, mini, maxi, eps, fast) + if (compute_grid_lambda == TRUE) + { + grid_lambda <- computeGridLambda(P$phiInit, P$rhoInit, P$piInit, P$gamInit, + X, Y, gamma, mini, maxi, eps, fast) + } if (length(grid_lambda) > size_coll_mod) grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)] @@ -119,7 +124,10 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi complexity = sumPen, contrast = -LLH) })) tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ] - + if (verbose == TRUE) + { + print(tableauRecap) + } modSel <- capushe::capushe(tableauRecap, n) indModSel <- if (selecMod == "DDSE") as.numeric(modSel@DDSE@model) else if (selecMod == "Djump") @@ -144,6 +152,7 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi Gam <- Gam/rowSums(Gam) modelSel$affec <- apply(Gam, 1, which.max) modelSel$proba <- Gam + modelSel$tableau <- tableauRecap if (plot) print(plot_valse(X, Y, modelSel, n)) diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R index bfe4042..cdc0ec0 100644 --- a/pkg/R/selectVariables.R +++ b/pkg/R/selectVariables.R @@ -4,16 +4,16 @@ #' #' @param phiInit an initial estimator for phi (size: p*m*k) #' @param rhoInit an initial estimator for rho (size: m*m*k) -#' @param piInit\tan initial estimator for pi (size : k) +#' @param piInit an initial estimator for pi (size : k) #' @param gamInit an initial estimator for gamma -#' @param mini\t\tminimum number of iterations in EM algorithm -#' @param maxi\t\tmaximum number of iterations in EM algorithm -#' @param gamma\t power in the penalty +#' @param mini minimum number of iterations in EM algorithm +#' @param maxi maximum number of iterations in EM algorithm +#' @param gamma power in the penalty #' @param glambda grid of regularization parameters -#' @param X\t\t\t matrix of regressors -#' @param Y\t\t\t matrix of responses +#' @param X matrix of regressors +#' @param Y matrix of responses #' @param thresh real, threshold to say a variable is relevant, by default = 1e-8 -#' @param eps\t\t threshold to say that EM algorithm has converged +#' @param eps threshold to say that EM algorithm has converged #' @param ncores Number or cores for parallel execution (1 to disable) #' #' @return a list of outputs, for each lambda in grid: selected,Rho,Pi diff --git a/pkg/data/data2.RData b/pkg/data/data2.RData new file mode 100644 index 0000000..80003e3 Binary files /dev/null and b/pkg/data/data2.RData differ diff --git a/pkg/data/script_data.R b/pkg/data/script_data.R new file mode 100644 index 0000000..7e5c036 --- /dev/null +++ b/pkg/data/script_data.R @@ -0,0 +1,12 @@ +m=11 +p=10 + +covY = array(0,dim = c(m,m,2)) +covY[,,1] = diag(m) +covY[,,2] = diag(m) + +Beta = array(0, dim = c(p, p, 2)) +Beta[1:4,1:4,1] = 3*diag(4) +Beta[1:4,1:4,2] = -2*diag(4) + +Data = generateXY(100, c(0.5,0.5), rep(0,p), Beta, diag(m), covY)