+++ /dev/null
-#' EMGLLF
-#'
-#' Description de EMGLLF
-#'
-#' @param phiInit an initialization for phi
-#' @param rhoInit an initialization for rho
-#' @param piInit an initialization for pi
-#' @param gamInit initialization for the a posteriori probabilities
-#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
-#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
-#' @param gamma integer for the power in the penaly, by default = 1
-#' @param lambda regularization parameter in the Lasso estimation
-#' @param X matrix of covariates (of size n*p)
-#' @param Y matrix of responses (of size n*m)
-#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
-#'
-#' @return A list ... phi,rho,pi,LLF,S,affec:
-#' phi : parametre de moyenne renormalisé, calculé par l'EM
-#' rho : parametre de variance renormalisé, calculé par l'EM
-#' pi : parametre des proportions renormalisé, calculé par l'EM
-#' LLF : log vraisemblance associée à cet échantillon, pour les valeurs estimées des paramètres
-#' S : ... affec : ...
-#'
-#' @export
-EMGLLF <- function(phiInit, rhoInit, piInit, gamInit,
- mini, maxi, gamma, lambda, X, Y, eps, fast=TRUE)
-{
- if (!fast)
- {
- # Function in R
- return (.EMGLLF_R(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau))
- }
-
- # Function in C
- n = nrow(X) #nombre d'echantillons
- p = ncol(X) #nombre de covariables
- m = ncol(Y) #taille de Y (multivarié)
- k = length(piInit) #nombre de composantes dans le mélange
- .Call("EMGLLF",
- phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, X, Y, tau,
- phi=double(p*m*k), rho=double(m*m*k), pi=double(k), LLF=double(maxi),
- S=double(p*m*k), affec=integer(n),
- n, p, m, k,
- PACKAGE="valse")
-}
-
-# R version - slow but easy to read
-.EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X2,Y,tau)
-{
- # Matrix dimensions
- n = dim(Y)[1]
- if (length(dim(phiInit)) == 2){
- p = 1
- m = dim(phiInit)[1]
- k = dim(phiInit)[2]
- } else {
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
- }
- X = matrix(nrow = n, ncol = p)
- X[1:n,1:p] = X2
- # Outputs
- phi = array(NA, dim = c(p,m,k))
- phi[1:p,,] = phiInit
- rho = rhoInit
- pi = piInit
- llh = -Inf
- S = array(0, dim=c(p,m,k))
-
- # Algorithm variables
- gam = gamInit
- Gram2 = array(0, dim=c(p,p,k))
- ps2 = array(0, dim=c(p,m,k))
- X2 = array(0, dim=c(n,p,k))
- Y2 = array(0, dim=c(n,m,k))
- EPS = 1e-15
-
- for (ite in 1:maxi)
- {
- # Remember last pi,rho,phi values for exit condition in the end of loop
- Phi = phi
- Rho = rho
- Pi = pi
-
- # Computations associated to X and Y
- for (r in 1:k)
- {
- for (mm in 1:m)
- Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm]
- for (i in 1:n)
- X2[i,,r] = sqrt(gam[i,r]) * X[i,]
- for (mm in 1:m)
- ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r])
- for (j in 1:p)
- {
- for (s in 1:p)
- Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r])
- }
- }
-
- #########
- #M step #
- #########
-
- # For pi
- b = sapply( 1:k, function(r) sum(abs(phi[,,r])) )
- gam2 = colSums(gam)
- a = sum(gam %*% log(pi))
-
- # While the proportions are nonpositive
- kk = 0
- pi2AllPositive = FALSE
- while (!pi2AllPositive)
- {
- pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi)
- pi2AllPositive = all(pi2 >= 0)
- kk = kk+1
- }
-
- # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
- while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
- -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) )
- {
- pi2 = pi + 0.1^kk * (1/n*gam2 - pi)
- kk = kk + 1
- }
- t = 0.1^kk
- pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi))
-
- #For phi and rho
- for (r in 1:k)
- {
- for (mm in 1:m)
- {
- ps = 0
- for (i in 1:n)
- ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r])
- nY2 = sum(Y2[,mm,r]^2)
- rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2)
- }
- }
-
- for (r in 1:k)
- {
- for (j in 1:p)
- {
- for (mm in 1:m)
- {
- S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r])
- if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma))
- phi[j,mm,r]=0
- else if(S[j,mm,r] > n*lambda*(pi[r]^gamma))
- phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r]
- else
- phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r]
- }
- }
- }
-
- ########
- #E step#
- ########
-
- # Precompute det(rho[,,r]) for r in 1...k
- detRho = sapply(1:k, function(r) det(rho[,,r]))
- gam1 = matrix(0, nrow = n, ncol = k)
- for (i in 1:n)
- {
- # Update gam[,]
- for (r in 1:k)
- {
- gam1[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r]
- }
- }
- gam = gam1 / rowSums(gam1)
- sumLogLLH = sum(log(rowSums(gam)) - log((2*base::pi)^(m/2)))
- sumPen = sum(pi^gamma * b)
- last_llh = llh
- llh = -sumLogLLH/n + lambda*sumPen
- dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) )
- Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) )
- Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) )
- Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) )
- dist2 = max(Dist1,Dist2,Dist3)
-
- if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau)))
- break
- }
-
- list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S)
-}