[valse.git] / pkg / R / constructionModelesLassoMLE.R

constructionModelesLassoMLE = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,
                                       X,Y,seuil,tau,selected, parallel = FALSE)
{
  if (parallel) {
    #TODO: parameter ncores (chaque tâche peut aussi demander du parallélisme...)
    cl = parallel::makeCluster( parallel::detectCores() / 4 )
    parallel::clusterExport(cl=cl,
                            varlist=c("phiInit","rhoInit","gamInit","mini","maxi","X","Y","seuil","tau"),
                            envir=environment())
    #Pour chaque lambda de la grille, on calcule les coefficients
    out = parLapply( seq_along(glambda), function(lambda)
    {
      n = dim(X)[1]
      p = dim(phiInit)[1]
      m = dim(phiInit)[2]
      k = dim(phiInit)[3]
      
      #TODO: phiInit[selected] et X[selected] sont bien sûr faux; par quoi remplacer ?
      #lambda == 0 c'est normal ? -> ED : oui, ici on calcule le maximum de vraisembance, donc on ne pénalise plus
      res = EMGLLF(phiInit[selected],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[selected],Y,tau)
      
      #comment évaluer la dimension à partir du résultat et de [not]selected ?
      #dimension = ...
      
      #on veut calculer la vraisemblance avec toutes nos estimations
      densite = vector("double",n)
      for (r in 1:k)
      {
        delta = Y%*%rho[,,r] - (X[selected]%*%res$phi[selected,,r])
        densite = densite + pi[r] *
          det(rho[,,r])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0)
      }
      llh = c( sum(log(densite[,lambda])), (dimension+m+1)*k-1 )
      list("phi"=res$phi, "rho"=res$rho, "pi"=res$pi, "llh" = llh)
    })
    parallel::stopCluster(cl)
    out
  }
  else {
    #Pour chaque lambda de la grille, on calcule les coefficients
    n = dim(X)[1]
    p = dim(phiInit)[1]
    m = dim(phiInit)[2]
    k = dim(phiInit)[3]
    L = length(selected)
    phi = list()
    phiLambda = array(0, dim = c(p,m,k))
    rho = list()
    pi = list()
    llh = list()
    
    out = lapply( seq_along(selected), function(lambda)
    {
      print(lambda)
      sel.lambda = selected[[lambda]]
      col.sel = which(colSums(sel.lambda)!=0)
      if (length(col.sel)>0){
        res_EM = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[,col.sel],Y,tau)
        phiLambda2 = res_EM$phi
        rhoLambda = res_EM$rho
        piLambda = res_EM$pi
        for (j in 1:length(col.sel)){
          phiLambda[col.sel[j],,] = phiLambda2[j,,]
        }
        
        dimension = 0
        for (j in 1:p){
          b = setdiff(1:m, sel.lambda[,j])
          if (length(b) > 0){
            phiLambda[j,b,] = 0.0
          }
          dimension = dimension + sum(sel.lambda[,j]!=0)
        }
        
        #on veut calculer la vraisemblance avec toutes nos estimations
        densite = vector("double",n)
        for (r in 1:k)
        {
          delta = Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r])
          densite = densite + piLambda[r] *
            det(rhoLambda[,,r])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0)
        }
        llhLambda = c( sum(log(densite)), (dimension+m+1)*k-1 )
        list("phi"= phiLambda, "rho"= rhoLambda, "pi"= piLambda, "llh" = llhLambda)
      }
    }
    )
    return(out)
  }
}
Commit	Line	Data
	1	constructionModelesLassoMLE = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,
	2	X,Y,seuil,tau,selected, parallel = FALSE)
	3	{
	4	if (parallel) {
	5	#TODO: parameter ncores (chaque tâche peut aussi demander du parallélisme...)
	6	cl = parallel::makeCluster( parallel::detectCores() / 4 )
	7	parallel::clusterExport(cl=cl,
	8	varlist=c("phiInit","rhoInit","gamInit","mini","maxi","X","Y","seuil","tau"),
	9	envir=environment())
	10	#Pour chaque lambda de la grille, on calcule les coefficients
	11	out = parLapply( seq_along(glambda), function(lambda)
	12	{
	13	n = dim(X)[1]
	14	p = dim(phiInit)[1]
	15	m = dim(phiInit)[2]
	16	k = dim(phiInit)[3]
	17
	18	#TODO: phiInit[selected] et X[selected] sont bien sûr faux; par quoi remplacer ?
	19	#lambda == 0 c'est normal ? -> ED : oui, ici on calcule le maximum de vraisembance, donc on ne pénalise plus
	20	res = EMGLLF(phiInit[selected],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[selected],Y,tau)
	21
	22	#comment évaluer la dimension à partir du résultat et de [not]selected ?
	23	#dimension = ...
	24
	25	#on veut calculer la vraisemblance avec toutes nos estimations
	26	densite = vector("double",n)
	27	for (r in 1:k)
	28	{
	29	delta = Y%%rho[,,r] - (X[selected]%%res$phi[selected,,r])
	30	densite = densite + pi[r] *
	31	det(rho[,,r])/(sqrt(2base::pi))^m exp(-tcrossprod(delta)/2.0)
	32	}
	33	llh = c( sum(log(densite[,lambda])), (dimension+m+1)*k-1 )
	34	list("phi"=res$phi, "rho"=res$rho, "pi"=res$pi, "llh" = llh)
	35	})
	36	parallel::stopCluster(cl)
	37	out
	38	}
	39	else {
	40	#Pour chaque lambda de la grille, on calcule les coefficients
	41	n = dim(X)[1]
	42	p = dim(phiInit)[1]
	43	m = dim(phiInit)[2]
	44	k = dim(phiInit)[3]
	45	L = length(selected)
	46	phi = list()
	47	phiLambda = array(0, dim = c(p,m,k))
	48	rho = list()
	49	pi = list()
	50	llh = list()
	51
	52	out = lapply( seq_along(selected), function(lambda)
	53	{
	54	print(lambda)
	55	sel.lambda = selected[[lambda]]
	56	col.sel = which(colSums(sel.lambda)!=0)
	57	if (length(col.sel)>0){
	58	res_EM = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0.,X[,col.sel],Y,tau)
	59	phiLambda2 = res_EM$phi
	60	rhoLambda = res_EM$rho
	61	piLambda = res_EM$pi
	62	for (j in 1:length(col.sel)){
	63	phiLambda[col.sel[j],,] = phiLambda2[j,,]
	64	}
	65
	66	dimension = 0
	67	for (j in 1:p){
	68	b = setdiff(1:m, sel.lambda[,j])
	69	if (length(b) > 0){
	70	phiLambda[j,b,] = 0.0
	71	}
	72	dimension = dimension + sum(sel.lambda[,j]!=0)
	73	}
	74
	75	#on veut calculer la vraisemblance avec toutes nos estimations
	76	densite = vector("double",n)
	77	for (r in 1:k)
	78	{
	79	delta = Y%%rhoLambda[,,r] - (X[, col.sel]%%phiLambda[col.sel,,r])
	80	densite = densite + piLambda[r] *
	81	det(rhoLambda[,,r])/(sqrt(2base::pi))^m exp(-tcrossprod(delta)/2.0)
	82	}
	83	llhLambda = c( sum(log(densite)), (dimension+m+1)*k-1 )
	84	list("phi"= phiLambda, "rho"= rhoLambda, "pi"= piLambda, "llh" = llhLambda)
	85	}
	86	}
	87	)
	88	return(out)
	89	}
	90	}