[valse.git] / pkg / R / valse.R

#' Main function
#'
#' @param X matrix of covariates (of size n*p)
#' @param Y matrix of responses (of size n*m)
#' @param procedure among 'LassoMLE' or 'LassoRank'
#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC'
#' @param gamma integer for the power in the penaly, by default = 1
#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
#' @param kmin integer, minimum number of clusters, by default = 2
#' @param kmax integer, maximum number of clusters, by default = 10
#' @param rang.min integer, minimum rank in the low rank procedure, by default = 1
#' @param rang.max integer, maximum rank in the
#' @return a list with estimators of parameters
#' @export
#-----------------------------------------------------------------------
valse = function(X,Y,procedure = 'LassoMLE',selecMod = 'DDSE',gamma = 1,mini = 10,
                 maxi = 50,eps = 1e-4,kmin = 2,kmax = 2,
                 rang.min = 1,rang.max = 10) {
  ##################################
  #core workflow: compute all models
  ##################################
  
  p = dim(X)[2]
  m = dim(Y)[2]
  n = dim(X)[1]
  
  model = list()
  tableauRecap = array(0, dim=c(1000,4))
  cpt = 0
  print("main loop: over all k and all lambda")
  
  for (k in kmin:kmax){
    print(k)
    print("Parameters initialization")
    #smallEM initializes parameters by k-means and regression model in each component,
    #doing this 20 times, and keeping the values maximizing the likelihood after 10
    #iterations of the EM algorithm.
    init = initSmallEM(k, X, Y)
    phiInit <<- init$phiInit
    rhoInit <<- init$rhoInit
    piInit	<<- init$piInit
    gamInit <<- init$gamInit
    source('~/valse/pkg/R/gridLambda.R')
    grid_lambda <<- gridLambda(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, eps)
    
    if (length(grid_lambda)>100){
      grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = 100)]
    }
    print("Compute relevant parameters")
    #select variables according to each regularization parameter
    #from the grid: A1 corresponding to selected variables, and
    #A2 corresponding to unselected variables.
    
    params = selectiontotale(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,grid_lambda,X,Y,1e-8,eps)
    #params2 = selectVariables(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,grid_lambda[seq(1,length(grid_lambda), by=3)],X,Y,1e-8,eps)
    ## etrange : params et params 2 sont différents ...
    selected <<- params$selected
    Rho <<- params$Rho
    Pi <<- params$Pi
    
    if (procedure == 'LassoMLE') {
      print('run the procedure Lasso-MLE')
      #compute parameter estimations, with the Maximum Likelihood
      #Estimator, restricted on selected variables.
      model[[k]] = constructionModelesLassoMLE(phiInit, rhoInit,piInit,gamInit,mini,maxi,gamma,X,Y,thresh,eps,selected)
      llh = matrix(ncol = 2)
      for (l in seq_along(model[[k]])){
        llh = rbind(llh, model[[k]][[l]]$llh)
      }
      LLH = llh[-1,1]
      D = llh[-1,2]
    } else {
      print('run the procedure Lasso-Rank')
      #compute parameter estimations, with the Low Rank
      #Estimator, restricted on selected variables.
      model = constructionModelesLassoRank(Pi, Rho, mini, maxi, X, Y, eps,
                                           A1, rank.min, rank.max)
      
      ################################################
      ### Regarder la SUITE  
      phi = runProcedure2()$phi
      Phi2 = Phi
      if (dim(Phi2)[1] == 0)
      {
        Phi[, , 1:k,] <<- phi
      } else
      {
        Phi <<- array(0, dim = c(p, m, kmax, dim(Phi2)[4] + dim(phi)[4]))
        Phi[, , 1:(dim(Phi2)[3]), 1:(dim(Phi2)[4])] <<- Phi2
        Phi[, , 1:k,-(1:(dim(Phi2)[4]))] <<- phi
      }
    }
    tableauRecap[(cpt+1):(cpt+length(model[[k]])), ] = matrix(c(LLH, D, rep(k, length(model[[k]])), 1:length(model[[k]])), ncol = 4)
    cpt = cpt+length(model[[k]])
  }
  print('Model selection')
  tableauRecap = tableauRecap[rowSums(tableauRecap[, 2:4])!=0,]
  tableauRecap = tableauRecap[(tableauRecap[,1])!=Inf,]
  data = cbind(1:dim(tableauRecap)[1], tableauRecap[,2], tableauRecap[,2], tableauRecap[,1])
  require(capushe)
  modSel = capushe(data, n)
  if (selecMod == 'DDSE') {
    indModSel = as.numeric(modSel@DDSE@model)
  } else if (selecMod == 'Djump') {
    indModSel = as.numeric(modSel@Djump@model)
  } else if (selecMod == 'BIC') {
    indModSel = modSel@BIC_capushe$model
  } else if (selecMod == 'AIC') {
    indModSel = modSel@AIC_capushe$model
  }
  return(model[[tableauRecap[indModSel,3]]][[tableauRecap[indModSel,4]]])
}
Commit	Line	Data
	1	#' Main function
	2	#'
	3	#' @param X matrix of covariates (of size n*p)
	4	#' @param Y matrix of responses (of size n*m)
	5	#' @param procedure among 'LassoMLE' or 'LassoRank'
	6	#' @param selecMod method to select a model among 'DDSE', 'DJump', 'BIC' or 'AIC'
	7	#' @param gamma integer for the power in the penaly, by default = 1
	8	#' @param mini integer, minimum number of iterations in the EM algorithm, by default = 10
	9	#' @param maxi integer, maximum number of iterations in the EM algorithm, by default = 100
	10	#' @param eps real, threshold to say the EM algorithm converges, by default = 1e-4
	11	#' @param kmin integer, minimum number of clusters, by default = 2
	12	#' @param kmax integer, maximum number of clusters, by default = 10
	13	#' @param rang.min integer, minimum rank in the low rank procedure, by default = 1
	14	#' @param rang.max integer, maximum rank in the
	15	#' @return a list with estimators of parameters
	16	#' @export
	17	#-----------------------------------------------------------------------
	18	valse = function(X,Y,procedure = 'LassoMLE',selecMod = 'DDSE',gamma = 1,mini = 10,
	19	maxi = 50,eps = 1e-4,kmin = 2,kmax = 2,
	20	rang.min = 1,rang.max = 10) {
	21	##################################
	22	#core workflow: compute all models
	23	##################################
	24
	25	p = dim(X)[2]
	26	m = dim(Y)[2]
	27	n = dim(X)[1]
	28
	29	model = list()
	30	tableauRecap = array(0, dim=c(1000,4))
	31	cpt = 0
	32	print("main loop: over all k and all lambda")
	33
	34	for (k in kmin:kmax){
	35	print(k)
	36	print("Parameters initialization")
	37	#smallEM initializes parameters by k-means and regression model in each component,
	38	#doing this 20 times, and keeping the values maximizing the likelihood after 10
	39	#iterations of the EM algorithm.
	40	init = initSmallEM(k, X, Y)
	41	phiInit <<- init$phiInit
	42	rhoInit <<- init$rhoInit
	43	piInit <<- init$piInit
	44	gamInit <<- init$gamInit
	45	source('~/valse/pkg/R/gridLambda.R')
	46	grid_lambda <<- gridLambda(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, eps)
	47
	48	if (length(grid_lambda)>100){
	49	grid_lambda = grid_lambda[seq(1, length(grid_lambda), length.out = 100)]
	50	}
	51	print("Compute relevant parameters")
	52	#select variables according to each regularization parameter
	53	#from the grid: A1 corresponding to selected variables, and
	54	#A2 corresponding to unselected variables.
	55
	56	params = selectiontotale(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,grid_lambda,X,Y,1e-8,eps)
	57	#params2 = selectVariables(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,grid_lambda[seq(1,length(grid_lambda), by=3)],X,Y,1e-8,eps)
	58	## etrange : params et params 2 sont différents ...
	59	selected <<- params$selected
	60	Rho <<- params$Rho
	61	Pi <<- params$Pi
	62
	63	if (procedure == 'LassoMLE') {
	64	print('run the procedure Lasso-MLE')
	65	#compute parameter estimations, with the Maximum Likelihood
	66	#Estimator, restricted on selected variables.
	67	model[[k]] = constructionModelesLassoMLE(phiInit, rhoInit,piInit,gamInit,mini,maxi,gamma,X,Y,thresh,eps,selected)
	68	llh = matrix(ncol = 2)
	69	for (l in seq_along(model[[k]])){
	70	llh = rbind(llh, model[[k]][[l]]$llh)
	71	}
	72	LLH = llh[-1,1]
	73	D = llh[-1,2]
	74	} else {
	75	print('run the procedure Lasso-Rank')
	76	#compute parameter estimations, with the Low Rank
	77	#Estimator, restricted on selected variables.
	78	model = constructionModelesLassoRank(Pi, Rho, mini, maxi, X, Y, eps,
	79	A1, rank.min, rank.max)
	80
	81	################################################
	82	### Regarder la SUITE
	83	phi = runProcedure2()$phi
	84	Phi2 = Phi
	85	if (dim(Phi2)[1] == 0)
	86	{
	87	Phi[, , 1:k,] <<- phi
	88	} else
	89	{
	90	Phi <<- array(0, dim = c(p, m, kmax, dim(Phi2)[4] + dim(phi)[4]))
	91	Phi[, , 1:(dim(Phi2)[3]), 1:(dim(Phi2)[4])] <<- Phi2
	92	Phi[, , 1:k,-(1:(dim(Phi2)[4]))] <<- phi
	93	}
	94	}
	95	tableauRecap[(cpt+1):(cpt+length(model[[k]])), ] = matrix(c(LLH, D, rep(k, length(model[[k]])), 1:length(model[[k]])), ncol = 4)
	96	cpt = cpt+length(model[[k]])
	97	}
	98	print('Model selection')
	99	tableauRecap = tableauRecap[rowSums(tableauRecap[, 2:4])!=0,]
	100	tableauRecap = tableauRecap[(tableauRecap[,1])!=Inf,]
	101	data = cbind(1:dim(tableauRecap)[1], tableauRecap[,2], tableauRecap[,2], tableauRecap[,1])
	102	require(capushe)
	103	modSel = capushe(data, n)
	104	if (selecMod == 'DDSE') {
	105	indModSel = as.numeric(modSel@DDSE@model)
	106	} else if (selecMod == 'Djump') {
	107	indModSel = as.numeric(modSel@Djump@model)
	108	} else if (selecMod == 'BIC') {
	109	indModSel = modSel@BIC_capushe$model
	110	} else if (selecMod == 'AIC') {
	111	indModSel = modSel@AIC_capushe$model
	112	}
	113	return(model[[tableauRecap[indModSel,3]]][[tableauRecap[indModSel,4]]])
	114	}