[valse.git] / R / main.R

#' @useDynLib valse

Valse = setRefClass(
	Class = "Valse",

	fields = c(
		# User defined

		# regression data (size n*p, where n is the number of observations,
		# and p is the number of regressors)
		X = "matrix",
		# response data (size n*m, where n is the number of observations,
		# and m is the number of responses)
		Y = "matrix",

		# Optionally user defined (some default values)

		# power in the penalty
		gamma = "numeric",
		# minimum number of iterations for EM algorithm
		mini = "integer",
		# maximum number of iterations for EM algorithm
		maxi = "integer",
		# threshold for stopping EM algorithm
		eps = "numeric",
		# minimum number of components in the mixture
		kmin = "integer",
		# maximum number of components in the mixture
		kmax = "integer",
		rangmin = "integer",
		rangmax = "integer",
		
		# Computed through the workflow

		# initialisation for the reparametrized conditional mean parameter
		phiInit = "numeric",
		# initialisation for the reparametrized variance parameter
		rhoInit = "numeric",
		# initialisation for the proportions
		piInit = "numeric",
		# initialisation for the allocations probabilities in each component
		tauInit = "numeric",
		# values for the regularization parameter grid
		gridLambda = "numeric",
		# je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
		# une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
		A1 = "integer",
		A2 = "integer",
		# collection of estimations for the reparametrized conditional mean parameters
		Phi = "numeric",
		# collection of estimations for the reparametrized variance parameters
		Rho = "numeric",
		# collection of estimations for the proportions parameters
		Pi = "numeric",

		#immutable (TODO:?)
		seuil = "numeric"
	),

	methods = list(
		#######################
		#initialize main object
		#######################
		initialize = function(X,Y,...)
		{
			"Initialize Valse object"

			callSuper(...)

			X <<- X
			Y <<- Y
			gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
			mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
			maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
			eps <<- ifelse (hasArg("eps"), eps, 1e-6)
			kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
			kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
			rangmin <<- ifelse (hasArg("rangmin"), rangmin, as.integer(2))
			rangmax <<- ifelse (hasArg("rangmax"), rangmax, as.integer(3))
			seuil <<- 1e-15 #immutable (TODO:?)
		},

		##################################
		#core workflow: compute all models
		##################################

		initParameters = function(k)
		{
			"Parameters initialization"

			#smallEM initializes parameters by k-means and regression model in each component,
			#doing this 20 times, and keeping the values maximizing the likelihood after 10
			#iterations of the EM algorithm.
			init = initSmallEM(k,X,Y,eps)
			phiInit <<- init$phi0
			rhoInit <<- init$rho0
			piInit	<<- init$pi0
			tauInit <<- init$tau0
		},

		computeGridLambda = function()
		{
			"computation of the regularization grid"
			#(according to explicit formula given by EM algorithm)

			gridLambda <<- gridLambda(phiInit,rhoInit,piInit,tauInit,X,Y,gamma,mini,maxi,eps)
		},

		computeRelevantParameters = function()
		{
			"Compute relevant parameters"

			#select variables according to each regularization parameter
			#from the grid: A1 corresponding to selected variables, and
			#A2 corresponding to unselected variables.
			params = selectiontotale(
				phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,seuil,eps)
			A1 <<- params$A1
			A2 <<- params$A2
			Rho <<- params$Rho
			Pi <<- params$Pi
		},

		runProcedure1 = function()
		{
			"Run procedure 1 [EMGLLF]"

			#compute parameter estimations, with the Maximum Likelihood
			#Estimator, restricted on selected variables.
			return ( constructionModelesLassoMLE(
				phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,seuil,eps,A1,A2) )
		},

		runProcedure2 = function()
		{
			"Run procedure 2 [EMGrank]"

			#compute parameter estimations, with the Low Rank
			#Estimator, restricted on selected variables.
			return ( constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,eps,
				A1,rangmin,rangmax) )
		},

		run = function()
		{
			"main loop: over all k and all lambda"

			# Run the all procedure, 1 with the
			#maximum likelihood refitting, and 2 with the Low Rank refitting.
			p = dim(phiInit)[1]
			m = dim(phiInit)[2]
			for (k in kmin:kmax)
			{
				print(k)
				initParameters(k)
				computeGridLambda()
				computeRelevantParameters()
				if (procedure == 1)
				{
					r1 = runProcedure1()
					Phi2 = Phi
					Rho2 = Rho
					Pi2 = Pi
					p = ncol(X)
					m = ncol(Y)
					if (is.null(dim(Phi2))) #test was: size(Phi2) == 0
					{
						Phi[,,1:k] <<- r1$phi
						Rho[,,1:k] <<- r1$rho
						Pi[1:k,] <<- r1$pi
					} else
					{
						Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(r1$phi)[4]))
						Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
						Phi[,,1:k,dim(Phi2)[4]+1] <<- r1$phi
						Rho <<- array(0., dim=c(m,m,kmax,dim(Rho2)[4]+dim(r1$rho)[4]))
						Rho[,,1:(dim(Rho2)[3]),1:(dim(Rho2)[4])] <<- Rho2
						Rho[,,1:k,dim(Rho2)[4]+1] <<- r1$rho
						Pi <<- array(0., dim=c(kmax,dim(Pi2)[2]+dim(r1$pi)[2]))
						Pi[1:nrow(Pi2),1:ncol(Pi2)] <<- Pi2
						Pi[1:k,ncol(Pi2)+1] <<- r1$pi
					}
				} else
				{
					phi = runProcedure2()$phi
					Phi2 = Phi
					if (dim(Phi2)[1] == 0)
					{
						Phi[,,1:k,] <<- phi
					} else
					{
						Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(phi)[4]))
						Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
						Phi[,,1:k,-(1:(dim(Phi2)[4]))] <<- phi
					}
				}
			}
		}

		##################################################
		#TODO: pruning: select only one (or a few best ?!) model
		##################################################
		#
		# 		function[model] selectModel(
		# 			#TODO
		# 			#model = odel(...)
		# 		end

		)
)
Commit	Line	Data
cad71b2c BA	1	#' @useDynLib valse
cad71b2c BA	2
8e92c49c BA	3	Valse = setRefClass(
8e92c49c BA	4	Class = "Valse",
39046da6 BA	5
	6	fields = c(
	7	# User defined
	8
	9	# regression data (size n*p, where n is the number of observations,
	10	# and p is the number of regressors)
aa8df014	11	X = "matrix",
8e92c49c	12	# response data (size n*m, where n is the number of observations,
39046da6	13	# and m is the number of responses)
aa8df014	14	Y = "matrix",
39046da6 BA	15
	16	# Optionally user defined (some default values)
	17
	18	# power in the penalty
09ab3c16	19	gamma = "numeric",
39046da6 BA	20	# minimum number of iterations for EM algorithm
	21	mini = "integer",
	22	# maximum number of iterations for EM algorithm
	23	maxi = "integer",
	24	# threshold for stopping EM algorithm
09ab3c16	25	eps = "numeric",
39046da6 BA	26	# minimum number of components in the mixture
	27	kmin = "integer",
	28	# maximum number of components in the mixture
	29	kmax = "integer",
	30	rangmin = "integer",
	31	rangmax = "integer",
	32
	33	# Computed through the workflow
	34
	35	# initialisation for the reparametrized conditional mean parameter
09ab3c16	36	phiInit = "numeric",
39046da6	37	# initialisation for the reparametrized variance parameter
09ab3c16	38	rhoInit = "numeric",
39046da6	39	# initialisation for the proportions
09ab3c16	40	piInit = "numeric",
39046da6	41	# initialisation for the allocations probabilities in each component
09ab3c16	42	tauInit = "numeric",
39046da6	43	# values for the regularization parameter grid
09ab3c16	44	gridLambda = "numeric",
39046da6 BA	45	# je ne crois pas vraiment qu'il faille les mettre en sortie, d'autant plus qu'on construit
39046da6 BA	46	# une matrice A1 et A2 pour chaque k, et elles sont grandes, donc ca coute un peu cher ...
09ab3c16 BA	47	A1 = "integer",
09ab3c16 BA	48	A2 = "integer",
39046da6	49	# collection of estimations for the reparametrized conditional mean parameters
09ab3c16	50	Phi = "numeric",
39046da6	51	# collection of estimations for the reparametrized variance parameters
09ab3c16	52	Rho = "numeric",
39046da6	53	# collection of estimations for the proportions parameters
09ab3c16	54	Pi = "numeric",
39046da6	55
09ab3c16 BA	56	#immutable (TODO:?)
09ab3c16 BA	57	seuil = "numeric"
39046da6 BA	58	),
	59
	60	methods = list(
	61	#######################
	62	#initialize main object
	63	#######################
	64	initialize = function(X,Y,...)
	65	{
8e92c49c	66	"Initialize Valse object"
39046da6 BA	67
	68	callSuper(...)
	69
8e92c49c BA	70	X <<- X
8e92c49c BA	71	Y <<- Y
39046da6 BA	72	gamma <<- ifelse (hasArg("gamma"), gamma, 1.)
	73	mini <<- ifelse (hasArg("mini"), mini, as.integer(5))
	74	maxi <<- ifelse (hasArg("maxi"), maxi, as.integer(10))
	75	eps <<- ifelse (hasArg("eps"), eps, 1e-6)
	76	kmin <<- ifelse (hasArg("kmin"), kmin, as.integer(2))
	77	kmax <<- ifelse (hasArg("kmax"), kmax, as.integer(3))
	78	rangmin <<- ifelse (hasArg("rangmin"), rangmin, as.integer(2))
	79	rangmax <<- ifelse (hasArg("rangmax"), rangmax, as.integer(3))
09ab3c16	80	seuil <<- 1e-15 #immutable (TODO:?)
39046da6 BA	81	},
	82
	83	##################################
	84	#core workflow: compute all models
	85	##################################
	86
	87	initParameters = function(k)
	88	{
	89	"Parameters initialization"
	90
	91	#smallEM initializes parameters by k-means and regression model in each component,
	92	#doing this 20 times, and keeping the values maximizing the likelihood after 10
	93	#iterations of the EM algorithm.
8e92c49c BA	94	init = initSmallEM(k,X,Y,eps)
	95	phiInit <<- init$phi0
	96	rhoInit <<- init$rho0
	97	piInit <<- init$pi0
	98	tauInit <<- init$tau0
39046da6 BA	99	},
	100
	101	computeGridLambda = function()
	102	{
	103	"computation of the regularization grid"
	104	#(according to explicit formula given by EM algorithm)
	105
8e92c49c	106	gridLambda <<- gridLambda(phiInit,rhoInit,piInit,tauInit,X,Y,gamma,mini,maxi,eps)
39046da6 BA	107	},
	108
	109	computeRelevantParameters = function()
	110	{
	111	"Compute relevant parameters"
	112
	113	#select variables according to each regularization parameter
8e92c49c BA	114	#from the grid: A1 corresponding to selected variables, and
	115	#A2 corresponding to unselected variables.
	116	params = selectiontotale(
	117	phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,seuil,eps)
39046da6 BA	118	A1 <<- params$A1
	119	A2 <<- params$A2
	120	Rho <<- params$Rho
	121	Pi <<- params$Pi
	122	},
	123
	124	runProcedure1 = function()
	125	{
	126	"Run procedure 1 [EMGLLF]"
	127
	128	#compute parameter estimations, with the Maximum Likelihood
	129	#Estimator, restricted on selected variables.
8e92c49c BA	130	return ( constructionModelesLassoMLE(
8e92c49c BA	131	phiInit,rhoInit,piInit,tauInit,mini,maxi,gamma,gridLambda,X,Y,seuil,eps,A1,A2) )
39046da6 BA	132	},
	133
	134	runProcedure2 = function()
	135	{
	136	"Run procedure 2 [EMGrank]"
	137
	138	#compute parameter estimations, with the Low Rank
	139	#Estimator, restricted on selected variables.
8e92c49c BA	140	return ( constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,eps,
8e92c49c BA	141	A1,rangmin,rangmax) )
39046da6 BA	142	},
39046da6 BA	143
8e92c49c	144	run = function()
39046da6 BA	145	{
	146	"main loop: over all k and all lambda"
	147
	148	# Run the all procedure, 1 with the
	149	#maximum likelihood refitting, and 2 with the Low Rank refitting.
8e92c49c BA	150	p = dim(phiInit)[1]
	151	m = dim(phiInit)[2]
	152	for (k in kmin:kmax)
	153	{
	154	print(k)
	155	initParameters(k)
	156	computeGridLambda()
	157	computeRelevantParameters()
	158	if (procedure == 1)
39046da6	159	{
8e92c49c BA	160	r1 = runProcedure1()
	161	Phi2 = Phi
	162	Rho2 = Rho
	163	Pi2 = Pi
	164	p = ncol(X)
	165	m = ncol(Y)
09ab3c16	166	if (is.null(dim(Phi2))) #test was: size(Phi2) == 0
8e92c49c	167	{
09ab3c16 BA	168	Phi[,,1:k] <<- r1$phi
	169	Rho[,,1:k] <<- r1$rho
	170	Pi[1:k,] <<- r1$pi
8e92c49c BA	171	} else
8e92c49c BA	172	{
09ab3c16 BA	173	Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(r1$phi)[4]))
	174	Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
	175	Phi[,,1:k,dim(Phi2)[4]+1] <<- r1$phi
	176	Rho <<- array(0., dim=c(m,m,kmax,dim(Rho2)[4]+dim(r1$rho)[4]))
	177	Rho[,,1:(dim(Rho2)[3]),1:(dim(Rho2)[4])] <<- Rho2
	178	Rho[,,1:k,dim(Rho2)[4]+1] <<- r1$rho
	179	Pi <<- array(0., dim=c(kmax,dim(Pi2)[2]+dim(r1$pi)[2]))
	180	Pi[1:nrow(Pi2),1:ncol(Pi2)] <<- Pi2
	181	Pi[1:k,ncol(Pi2)+1] <<- r1$pi
8e92c49c BA	182	}
	183	} else
	184	{
	185	phi = runProcedure2()$phi
	186	Phi2 = Phi
	187	if (dim(Phi2)[1] == 0)
	188	{
09ab3c16	189	Phi[,,1:k,] <<- phi
8e92c49c	190	} else
39046da6	191	{
09ab3c16 BA	192	Phi <<- array(0., dim=c(p,m,kmax,dim(Phi2)[4]+dim(phi)[4]))
	193	Phi[,,1:(dim(Phi2)[3]),1:(dim(Phi2)[4])] <<- Phi2
	194	Phi[,,1:k,-(1:(dim(Phi2)[4]))] <<- phi
8e92c49c BA	195	}
	196	}
	197	}
	198	}
	199
39046da6	200	##################################################
8e92c49c	201	#TODO: pruning: select only one (or a few best ?!) model
39046da6 BA	202	##################################################
39046da6 BA	203	#
8e92c49c	204	# function[model] selectModel(
39046da6	205	# #TODO
8e92c49c	206	# #model = odel(...)
39046da6	207	# end
8e92c49c	208
39046da6 BA	209	)
39046da6 BA	210	)