From: emilie Date: Wed, 5 Jul 2017 15:58:05 +0000 (+0200) Subject: fix the likelihood computation. fix some other few things X-Git-Url: https://git.auder.net/doc/current/R.css?a=commitdiff_plain;h=923ed737d0fa335b858204b813c964432488abbe;p=valse.git fix the likelihood computation. fix some other few things --- diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index 3b33e25..c28f54b 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -22,8 +22,9 @@ Imports: parallel Suggests: capushe, + methods, roxygen2, - testhat + testthat URL: http://git.auder.net/?p=valse.git License: MIT + file LICENSE RoxygenNote: 5.0.1 diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R index 16f58b6..d2a16bc 100644 --- a/pkg/R/constructionModelesLassoMLE.R +++ b/pkg/R/constructionModelesLassoMLE.R @@ -63,25 +63,39 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ] dimension <- length(unlist(sel.lambda)) - ## Computation of the loglikelihood - # Precompute det(rhoLambda[,,r]) for r in 1...k - detRho <- sapply(1:k, function(r) gdet(rhoLambda[, , r])) - sumLogLLH <- 0 + ## Affectations + Gam <- matrix(0, ncol = length(piLambda), nrow = n) for (i in 1:n) { - # Update gam[,]; use log to avoid numerical problems - logGam <- sapply(1:k, function(r) { - log(piLambda[r]) + log(detRho[r]) - 0.5 * - sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) - }) - - logGam <- logGam - max(logGam) #adjust without changing proportions - gam <- exp(logGam) - norm_fact <- sum(gam) - sumLogLLH <- sumLogLLH + log(norm_fact) - log((2 * base::pi)^(m/2)) + for (r in 1:length(piLambda)) + { + sqNorm2 <- sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + Gam[i, r] <- piLambda[r] * exp(-0.5 * sqNorm2) * det(rhoLambda[, , r]) + } } - llhLambda <- c(-sumLogLLH/n, (dimension + m + 1) * k - 1) - list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda) + Gam2 <- Gam/rowSums(Gam) + affec <- apply(Gam2, 1, which.max) + proba <- Gam2 + LLH <- c(sum(log(apply(Gam,1,sum))), (dimension + m + 1) * k - 1) + # ## Computation of the loglikelihood + # # Precompute det(rhoLambda[,,r]) for r in 1...k + # detRho <- sapply(1:k, function(r) gdet(rhoLambda[, , r])) + # sumLogLLH <- 0 + # for (i in 1:n) + # { + # # Update gam[,]; use log to avoid numerical problems + # logGam <- sapply(1:k, function(r) { + # log(piLambda[r]) + log(detRho[r]) - 0.5 * + # sum((Y[i, ] %*% rhoLambda[, , r] - X[i, ] %*% phiLambda[, , r])^2) + # }) + # + # #logGam <- logGam - max(logGam) #adjust without changing proportions -> change the LLH + # gam <- exp(logGam) + # norm_fact <- sum(gam) + # sumLogLLH <- sumLogLLH + log(norm_fact) - m/2* log(2 * base::pi) + # } + #llhLambda <- c(-sumLogLLH/n, (dimension + m + 1) * k - 1) + list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = LLH, affec = affec, proba = proba) } # For each lambda, computation of the parameters diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R index 44b4b06..179822f 100644 --- a/pkg/R/initSmallEM.R +++ b/pkg/R/initSmallEM.R @@ -55,7 +55,7 @@ initSmallEM <- function(k, X, Y, fast) dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] - X[i, ] %*% phiInit1[, , r, repet]) Gam[i, r] <- piInit1[repet, r] * - gdet(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct) + det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct) } sumGamI <- sum(Gam[i, ]) gamInit1[i, , repet] <- Gam[i, ]/sumGamI diff --git a/pkg/R/main.R b/pkg/R/main.R index 632d90b..bb1e3fe 100644 --- a/pkg/R/main.R +++ b/pkg/R/main.R @@ -123,6 +123,8 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi complexity = sumPen, contrast = -LLH) })) tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ] + + if (verbose == TRUE) print(tableauRecap) modSel <- capushe::capushe(tableauRecap, n) @@ -140,26 +142,10 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi modSel@AIC_capushe$model } - - mod <- as.character(tableauRecap[indModSel, 1]) - listMod <- as.integer(unlist(strsplit(mod, "[.]"))) + listMod <- as.integer(unlist(strsplit(as.character(indModSel), "[.]"))) modelSel <- models_list[[listMod[1]]][[listMod[2]]] - - ## Affectations - Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n) - for (i in 1:n) - { - for (r in 1:length(modelSel$pi)) - { - sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2) - Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * gdet(modelSel$rho[, , r]) - } - } - Gam <- Gam/rowSums(Gam) - modelSel$affec <- apply(Gam, 1, which.max) - modelSel$proba <- Gam modelSel$tableau <- tableauRecap - + if (plot) print(plot_valse(X, Y, modelSel, n)) diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R index 39e54d2..bab45cc 100644 --- a/pkg/R/selectVariables.R +++ b/pkg/R/selectVariables.R @@ -67,6 +67,8 @@ selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma } if (ncores > 1) parallel::stopCluster(cl) + + print(out) # Suppress models which are computed twice En fait, ca ca fait la comparaison de # tous les parametres On veut juste supprimer ceux qui ont les memes variables # sélectionnées diff --git a/pkg/data/script_data.R b/pkg/data/script_data.R index d425a38..7479674 100644 --- a/pkg/data/script_data.R +++ b/pkg/data/script_data.R @@ -1,5 +1,5 @@ -m=11 -p=10 +m=6 +p=6 covY = array(0,dim = c(m,m,2)) covY[,,1] = diag(m) @@ -9,7 +9,7 @@ Beta = array(0, dim = c(p, m, 2)) Beta[1:4,1:4,1] = 3*diag(4) Beta[1:4,1:4,2] = -2*diag(4) -#Data = generateXY(100, c(0.5,0.5), rep(0,p), Beta, diag(p), covY) - -#Res = valse(Data$X,Data$Y, fast=FALSE, plot=FALSE, verbose = TRUE, kmax=2, size_coll_mod = 100, -# selecMod = "BIC") +#Data = generateXY(200, c(0.5,0.5), rep(0,p), Beta, diag(p), covY) +# +#Res = valse(Data$X,Data$Y, fast=FALSE, plot=FALSE, verbose = TRUE, kmax=3, size_coll_mod = 50, selecMod = "DDSE", mini = 50, maxi=100) +#plot(Res$tableau[,3], -Res$tableau[,4])