[valse.git] / pkg / R / selectVariables.R

#' selectVariables
#'
#' It is a function which construct, for a given lambda, the sets of relevant variables.
#'
#' @param phiInit an initial estimator for phi (size: p*m*k)
#' @param rhoInit an initial estimator for rho (size: m*m*k)
#' @param piInit	an initial estimator for pi (size : k)
#' @param gamInit an initial estimator for gamma
#' @param mini		minimum number of iterations in EM algorithm
#' @param maxi		maximum number of iterations in EM algorithm
#' @param gamma	 power in the penalty
#' @param glambda grid of regularization parameters
#' @param X			 matrix of regressors
#' @param Y			 matrix of responses
#' @param thres	 threshold to consider a coefficient to be equal to 0
#' @param tau		 threshold to say that EM algorithm has converged
#'
#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
#'
#' @examples TODO
#'
#' @export
#'
selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
	X,Y,thresh,tau, ncores=1) #ncores==1 ==> no //
{
	if (ncores > 1)
	{
		cl = parallel::makeCluster(ncores)
		parallel::clusterExport(cl=cl,
			varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
			envir=environment())
	}

	# Calcul pour un lambda
	computeCoefs <-function(lambda)
	{
		params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)

		p = dim(phiInit)[1]
		m = dim(phiInit)[2]

		#selectedVariables: list where element j contains vector of selected variables in [1,m]
		selectedVariables = sapply(1:p, function(j) { ## je me suis permise de changer le type, 
		  ##une liste de liste ca devenait compliqué je trouve pour choper ce qui nous intéresse
			#from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
			#and finally return the corresponding indices
			#seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
		  c(seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ], 
		    rep(0, m-length(seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ] ) ))
		})

		list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
	}

	# Pour chaque lambda de la grille, on calcule les coefficients
	out <-
		if (ncores > 1){
			parLapply(cl, seq_along(glambda, computeCoefs))}
		else lapply(seq_along(glambda), computeCoefs)
	if (ncores > 1){
		parallel::stopCluster(cl)}
	out
}
Commit	Line	Data
	1	#' selectVariables
	2	#'
	3	#' It is a function which construct, for a given lambda, the sets of relevant variables.
	4	#'
	5	#' @param phiInit an initial estimator for phi (size: pmk)
	6	#' @param rhoInit an initial estimator for rho (size: mmk)
	7	#' @param piInit an initial estimator for pi (size : k)
	8	#' @param gamInit an initial estimator for gamma
	9	#' @param mini minimum number of iterations in EM algorithm
	10	#' @param maxi maximum number of iterations in EM algorithm
	11	#' @param gamma power in the penalty
	12	#' @param glambda grid of regularization parameters
	13	#' @param X matrix of regressors
	14	#' @param Y matrix of responses
	15	#' @param thres threshold to consider a coefficient to be equal to 0
	16	#' @param tau threshold to say that EM algorithm has converged
	17	#'
	18	#' @return a list of outputs, for each lambda in grid: selected,Rho,Pi
	19	#'
	20	#' @examples TODO
	21	#'
	22	#' @export
	23	#'
	24	selectVariables = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,
	25	X,Y,thresh,tau, ncores=1) #ncores==1 ==> no //
	26	{
	27	if (ncores > 1)
	28	{
	29	cl = parallel::makeCluster(ncores)
	30	parallel::clusterExport(cl=cl,
	31	varlist=c("phiInit","rhoInit","gamInit","mini","maxi","glambda","X","Y","thresh","tau"),
	32	envir=environment())
	33	}
	34
	35	# Calcul pour un lambda
	36	computeCoefs <-function(lambda)
	37	{
	38	params = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)
	39
	40	p = dim(phiInit)[1]
	41	m = dim(phiInit)[2]
	42
	43	#selectedVariables: list where element j contains vector of selected variables in [1,m]
	44	selectedVariables = sapply(1:p, function(j) { ## je me suis permise de changer le type,
	45	##une liste de liste ca devenait compliqué je trouve pour choper ce qui nous intéresse
	46	#from boolean matrix mxk of selected variables obtain the corresponding boolean m-vector,
	47	#and finally return the corresponding indices
	48	#seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ]
	49	c(seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ],
	50	rep(0, m-length(seq_len(m)[ apply( abs(params$phi[j,,]) > thresh, 1, any ) ] ) ))
	51	})
	52
	53	list("selected"=selectedVariables,"Rho"=params$rho,"Pi"=params$pi)
	54	}
	55
	56	# Pour chaque lambda de la grille, on calcule les coefficients
	57	out <-
	58	if (ncores > 1){
	59	parLapply(cl, seq_along(glambda, computeCoefs))}
	60	else lapply(seq_along(glambda), computeCoefs)
	61	if (ncores > 1){
	62	parallel::stopCluster(cl)}
	63	out
	64	}