[valse.git] / pkg / R / main.R

#' @useDynLib valse

Valse = setRefClass(
	Class = "Valse",

	fields = c(
		# User defined

		# regression data (size n*p, where n is the number of observations,
		# and p is the number of regressors)
		X = "matrix",
		# response data (size n*m, where n is the number of observations,
		# and m is the number of responses)
		Y = "matrix",

		# Optionally user defined (some default values)

		# power in the penalty
		gamma = "numeric",
		# minimum number of iterations for EM algorithm
		mini = "integer",
		# maximum number of iterations for EM algorithm
		maxi = "integer",
		# threshold for stopping EM algorithm
		eps = "numeric",
		# minimum number of components in the mixture
		kmin = "integer",
		# maximum number of components in the mixture
		kmax = "integer",
		# ranks for the Lasso-Rank procedure
		rank.min = "integer",
		rank.max = "integer",

		# Computed through the workflow

		# initialisation for the reparametrized conditional mean parameter
		phiInit = "numeric",
		# initialisation for the reparametrized variance parameter
		rhoInit = "numeric",
		# initialisation for the proportions
		piInit = "numeric",
		# initialisation for the allocations probabilities in each component
		tauInit = "numeric",
		# values for the regularization parameter grid
		gridLambda = "numeric",
		# je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
		# une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
		A1 = "integer",
		A2 = "integer",
		# collection of estimations for the reparametrized conditional mean parameters
		Phi = "numeric",
		# collection of estimations for the reparametrized variance parameters
		Rho = "numeric",
		# collection of estimations for the proportions parameters
		Pi = "numeric",

		#immutable (TODO:?)
		thresh = "numeric"
	),

	methods = list(
		#######################
		#initialize main object
		#######################
		initialize = function(X,Y,...)
		{
			"Initialize Valse object"

			callSuper(...)

			X <<- X
			Y <<- Y
			gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
			mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
			maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
			eps <<- ifelse (hasArg("eps"), eps, 1e-6)
			kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
			kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
			rank.min <<- ifelse (hasArg("rank.min"), rank.min, as.integer(2))
			rank.max <<- ifelse (hasArg("rank.max"), rank.max, as.integer(3))
			thresh <<- 1e-15 #immutable (TODO:?)
		},

		##################################
		#core workflow: compute all models
		##################################

		initParameters = function(k)
		{
			"Parameters initialization"

			#smallEM initializes parameters by k-means and regression model in each component,
			#doing this 20 times, and keeping the values maximizing the likelihood after 10
			#iterations of the EM algorithm.
			init = initSmallEM(k,X,Y)
			phiInit <<- init$phi0
			rhoInit <<- init$rho0
			piInit	<<- init$pi0
			tauInit <<- init$tau0
		},

		computeGridLambda = function()
		{
			"computation of the regularization grid"
			#(according to explicit formula given by EM algorithm)

			gridLambda <<- gridLambda(phiInit,rhoInit,piInit,tauInit,X,Y,gamma,mini,maxi,eps)
		},

		computeRelevantParameters = function()
		{
			"Compute relevant parameters"

			#select variables according to each regularization parameter
			#from the grid: A1 corresponding to selected variables, and
			#A2 corresponding to unselected variables.
			params = selectiontotale(
				phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps)
			A1 <<- params$A1
			A2 <<- params$A2
			Rho <<- params$Rho
			Pi <<- params$Pi
		},

		runProcedure1 = function()
		{
			"Run procedure 1 [EMGLLF]"

			#compute parameter estimations, with the Maximum Likelihood
			#Estimator, restricted on selected variables.
			return ( constructionModelesLassoMLE(
				phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps,A1,A2) )
		},

		runProcedure2 = function()
		{
			"Run procedure 2 [EMGrank]"

			#compute parameter estimations, with the Low Rank
			#Estimator, restricted on selected variables.
			return ( constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,eps,
				A1,rank.min,rank.max) )
		},

		run = function()
		{
			"main loop: over all k and all lambda"

			# Run the whole procedure, 1 with the
			#maximum likelihood refitting, and 2 with the Low Rank refitting.
			p = dim(phiInit)[1]
			m = dim(phiInit)[2]
			for (k in kmin:kmax)
			{
				print(k)
				initParameters(k)
				computeGridLambda()
				computeRelevantParameters()
				if (procedure == 1)
				{
					r1 = runProcedure1()
					Phi2 = Phi
					Rho2 = Rho
					Pi2 = Pi
					p = ncol(X)
					m = ncol(Y)
					if (is.null(dim(Phi2))) #test was: size(Phi2) == 0
					{
						Phi[,,1:k] <<- r1$phi
						Rho[,,1:k] <<- r1$rho
						Pi[1:k,] <<- r1$pi
					} else
					{
						Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(r1$phi)[4]))
						Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
						Phi[,,1:k,dim(Phi2)[4]+1] <<- r1$phi
						Rho <<- array(0., dim=c(m,m,kmax,dim(Rho2)[4]+dim(r1$rho)[4]))
						Rho[,,1:(dim(Rho2)[3]),1:(dim(Rho2)[4])] <<- Rho2
						Rho[,,1:k,dim(Rho2)[4]+1] <<- r1$rho
						Pi <<- array(0., dim=c(kmax,dim(Pi2)[2]+dim(r1$pi)[2]))
						Pi[1:nrow(Pi2),1:ncol(Pi2)] <<- Pi2
						Pi[1:k,ncol(Pi2)+1] <<- r1$pi
					}
				} else
				{
					phi = runProcedure2()$phi
					Phi2 = Phi
					if (dim(Phi2)[1] == 0)
					{
						Phi[,,1:k,] <<- phi
					} else
					{
						Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(phi)[4]))
						Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
						Phi[,,1:k,-(1:(dim(Phi2)[4]))] <<- phi
					}
				}
			}
		}

		##################################################
		#TODO: pruning: select only one (or a few best ?!) model
		##################################################
		#
		# 		function[model] selectModel(
		# 			#TODO
		# 			#model = odel(...)
		# 		end
		# Give at least the slope heuristic and BIC, and AIC ?

		)
)
Commit	Line	Data
cad71b2c BA	1	#' @useDynLib valse
cad71b2c BA	2
8e92c49c BA	3	Valse = setRefClass(
8e92c49c BA	4	Class = "Valse",
39046da6 BA	5
	6	fields = c(
	7	# User defined
	8
	9	# regression data (size n*p, where n is the number of observations,
	10	# and p is the number of regressors)
aa8df014	11	X = "matrix",
8e92c49c	12	# response data (size n*m, where n is the number of observations,
39046da6	13	# and m is the number of responses)
aa8df014	14	Y = "matrix",
39046da6 BA	15
	16	# Optionally user defined (some default values)
	17
	18	# power in the penalty
09ab3c16	19	gamma = "numeric",
39046da6 BA	20	# minimum number of iterations for EM algorithm
	21	mini = "integer",
	22	# maximum number of iterations for EM algorithm
	23	maxi = "integer",
	24	# threshold for stopping EM algorithm
09ab3c16	25	eps = "numeric",
39046da6 BA	26	# minimum number of components in the mixture
	27	kmin = "integer",
	28	# maximum number of components in the mixture
	29	kmax = "integer",
22d21a22	30	# ranks for the Lasso-Rank procedure
	31	rank.min = "integer",
	32	rank.max = "integer",
	33
39046da6 BA	34	# Computed through the workflow
	35
	36	# initialisation for the reparametrized conditional mean parameter
09ab3c16	37	phiInit = "numeric",
39046da6	38	# initialisation for the reparametrized variance parameter
09ab3c16	39	rhoInit = "numeric",
39046da6	40	# initialisation for the proportions
09ab3c16	41	piInit = "numeric",
39046da6	42	# initialisation for the allocations probabilities in each component
09ab3c16	43	tauInit = "numeric",
39046da6	44	# values for the regularization parameter grid
09ab3c16	45	gridLambda = "numeric",
39046da6 BA	46	# je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
39046da6 BA	47	# une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
09ab3c16 BA	48	A1 = "integer",
09ab3c16 BA	49	A2 = "integer",
39046da6	50	# collection of estimations for the reparametrized conditional mean parameters
09ab3c16	51	Phi = "numeric",
39046da6	52	# collection of estimations for the reparametrized variance parameters
09ab3c16	53	Rho = "numeric",
39046da6	54	# collection of estimations for the proportions parameters
09ab3c16	55	Pi = "numeric",
39046da6	56
09ab3c16	57	#immutable (TODO:?)
22d21a22	58	thresh = "numeric"
39046da6 BA	59	),
	60
	61	methods = list(
	62	#######################
	63	#initialize main object
	64	#######################
	65	initialize = function(X,Y,...)
	66	{
8e92c49c	67	"Initialize Valse object"
39046da6 BA	68
	69	callSuper(...)
	70
8e92c49c BA	71	X <<- X
8e92c49c BA	72	Y <<- Y
39046da6 BA	73	gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
	74	mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
	75	maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
	76	eps <<- ifelse (hasArg("eps"), eps, 1e-6)
	77	kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
	78	kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
22d21a22	79	rank.min <<- ifelse (hasArg("rank.min"), rank.min, as.integer(2))
	80	rank.max <<- ifelse (hasArg("rank.max"), rank.max, as.integer(3))
	81	thresh <<- 1e-15 #immutable (TODO:?)
39046da6 BA	82	},
	83
	84	##################################
	85	#core workflow: compute all models
	86	##################################
	87
	88	initParameters = function(k)
	89	{
	90	"Parameters initialization"
	91
	92	#smallEM initializes parameters by k-means and regression model in each component,
	93	#doing this 20 times, and keeping the values maximizing the likelihood after 10
	94	#iterations of the EM algorithm.
e3f2fe8a	95	init = initSmallEM(k,X,Y)
8e92c49c BA	96	phiInit <<- init$phi0
	97	rhoInit <<- init$rho0
	98	piInit <<- init$pi0
	99	tauInit <<- init$tau0
39046da6 BA	100	},
	101
	102	computeGridLambda = function()
	103	{
	104	"computation of the regularization grid"
	105	#(according to explicit formula given by EM algorithm)
	106
8e92c49c	107	gridLambda <<- gridLambda(phiInit,rhoInit,piInit,tauInit,X,Y,gamma,mini,maxi,eps)
39046da6 BA	108	},
	109
	110	computeRelevantParameters = function()
	111	{
	112	"Compute relevant parameters"
	113
	114	#select variables according to each regularization parameter
8e92c49c BA	115	#from the grid: A1 corresponding to selected variables, and
	116	#A2 corresponding to unselected variables.
	117	params = selectiontotale(
22d21a22	118	phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps)
39046da6 BA	119	A1 <<- params$A1
	120	A2 <<- params$A2
	121	Rho <<- params$Rho
	122	Pi <<- params$Pi
	123	},
	124
	125	runProcedure1 = function()
	126	{
	127	"Run procedure 1 [EMGLLF]"
	128
	129	#compute parameter estimations, with the Maximum Likelihood
	130	#Estimator, restricted on selected variables.
8e92c49c	131	return ( constructionModelesLassoMLE(
22d21a22	132	phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,thresh,eps,A1,A2) )
39046da6 BA	133	},
	134
	135	runProcedure2 = function()
	136	{
	137	"Run procedure 2 [EMGrank]"
	138
	139	#compute parameter estimations, with the Low Rank
	140	#Estimator, restricted on selected variables.
8e92c49c	141	return ( constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,eps,
22d21a22	142	A1,rank.min,rank.max) )
39046da6 BA	143	},
39046da6 BA	144
8e92c49c	145	run = function()
39046da6 BA	146	{
	147	"main loop: over all k and all lambda"
	148
22d21a22	149	# Run the whole procedure, 1 with the
39046da6	150	#maximum likelihood refitting, and 2 with the Low Rank refitting.
8e92c49c BA	151	p = dim(phiInit)[1]
	152	m = dim(phiInit)[2]
	153	for (k in kmin:kmax)
	154	{
	155	print(k)
	156	initParameters(k)
	157	computeGridLambda()
	158	computeRelevantParameters()
	159	if (procedure == 1)
39046da6	160	{
8e92c49c BA	161	r1 = runProcedure1()
	162	Phi2 = Phi
	163	Rho2 = Rho
	164	Pi2 = Pi
	165	p = ncol(X)
	166	m = ncol(Y)
09ab3c16	167	if (is.null(dim(Phi2))) #test was: size(Phi2) == 0
8e92c49c	168	{
09ab3c16 BA	169	Phi[,,1:k] <<- r1$phi
	170	Rho[,,1:k] <<- r1$rho
	171	Pi[1:k,] <<- r1$pi
8e92c49c BA	172	} else
8e92c49c BA	173	{
09ab3c16 BA	174	Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(r1$phi)[4]))
	175	Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
	176	Phi[,,1:k,dim(Phi2)[4]+1] <<- r1$phi
	177	Rho <<- array(0., dim=c(m,m,kmax,dim(Rho2)[4]+dim(r1$rho)[4]))
	178	Rho[,,1:(dim(Rho2)[3]),1:(dim(Rho2)[4])] <<- Rho2
	179	Rho[,,1:k,dim(Rho2)[4]+1] <<- r1$rho
	180	Pi <<- array(0., dim=c(kmax,dim(Pi2)[2]+dim(r1$pi)[2]))
	181	Pi[1:nrow(Pi2),1:ncol(Pi2)] <<- Pi2
	182	Pi[1:k,ncol(Pi2)+1] <<- r1$pi
8e92c49c BA	183	}
	184	} else
	185	{
	186	phi = runProcedure2()$phi
	187	Phi2 = Phi
	188	if (dim(Phi2)[1] == 0)
	189	{
09ab3c16	190	Phi[,,1:k,] <<- phi
8e92c49c	191	} else
39046da6	192	{
09ab3c16 BA	193	Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(phi)[4]))
	194	Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
	195	Phi[,,1:k,-(1:(dim(Phi2)[4]))] <<- phi
8e92c49c BA	196	}
	197	}
	198	}
	199	}
	200
39046da6	201	##################################################
8e92c49c	202	#TODO: pruning: select only one (or a few best ?!) model
39046da6 BA	203	##################################################
39046da6 BA	204	#
8e92c49c	205	# function[model] selectModel(
39046da6	206	# #TODO
8e92c49c	207	# #model = odel(...)
39046da6	208	# end
22d21a22	209	# Give at least the slope heuristic and BIC, and AIC ?
8e92c49c	210
39046da6 BA	211	)
39046da6 BA	212	)