[valse.git] / src / test / generate_test_data / helpers / EMGrank.R

#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
matricize <- function(X)
{
	if (!is.matrix(X))
		return (t(as.matrix(X)))
	return (X)
}

require(MASS)
EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
{
  #matrix dimensions
  n = dim(X)[1]
  p = dim(X)[2]
  m = dim(Rho)[2]
  k = dim(Rho)[3]
  
  #init outputs
  phi = array(0, dim=c(p,m,k))
  Z = rep(1, n)
  LLF = 0
  
  #local variables
  Phi = array(0, dim=c(p,m,k))
  deltaPhi = c()
  sumDeltaPhi = 0.
  deltaPhiBufferSize = 20
  
  #main loop
  ite = 1
  while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
	{
    #M step: Mise à jour de Beta (et donc phi)
    for(r in 1:k)
		{
      Z_indice = seq_len(n)[Z==r] #indices où Z == r
      if (length(Z_indice) == 0)
        next
      #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
      s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
				crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
      S = s$d
      #Set m-rank(r) singular values to zero, and recompose
      #best rank(r) approximation of the initial product
      if(rank[r] < length(S))
        S[(rank[r]+1):length(S)] = 0
      phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
    }

		#Etape E et calcul de LLF
		sumLogLLF2 = 0
		for(i in seq_len(n))
		{
			sumLLF1 = 0
			maxLogGamIR = -Inf
			for (r in seq_len(k))
			{
				dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
				logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
				#Z[i] = index of max (gam[i,])
				if(logGamIR > maxLogGamIR)
				{
					Z[i] = r
					maxLogGamIR = logGamIR
				}
				sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
			}
			sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
		}
  
		LLF = -1/n * sumLogLLF2

		#update distance parameter to check algorithm convergence (delta(phi, Phi))
		deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
		if (length(deltaPhi) > deltaPhiBufferSize)
		  deltaPhi = deltaPhi[2:length(deltaPhi)]
		sumDeltaPhi = sum(abs(deltaPhi))

		#update other local variables
		Phi = phi
		ite = ite+1
  }
  return(list("phi"=phi, "LLF"=LLF))
}
Commit	Line	Data
	1	#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
	2	matricize <- function(X)
	3	{
	4	if (!is.matrix(X))
	5	return (t(as.matrix(X)))
	6	return (X)
	7	}
	8
	9	require(MASS)
	10	EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
	11	{
	12	#matrix dimensions
	13	n = dim(X)[1]
	14	p = dim(X)[2]
	15	m = dim(Rho)[2]
	16	k = dim(Rho)[3]
	17
	18	#init outputs
	19	phi = array(0, dim=c(p,m,k))
	20	Z = rep(1, n)
	21	LLF = 0
	22
	23	#local variables
	24	Phi = array(0, dim=c(p,m,k))
	25	deltaPhi = c()
	26	sumDeltaPhi = 0.
	27	deltaPhiBufferSize = 20
	28
	29	#main loop
	30	ite = 1
	31	while (ite<=mini \|\| (ite<=maxi && sumDeltaPhi>tau))
	32	{
	33	#M step: Mise à jour de Beta (et donc phi)
	34	for(r in 1:k)
	35	{
	36	Z_indice = seq_len(n)[Z==r] #indices où Z == r
	37	if (length(Z_indice) == 0)
	38	next
	39	#U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
	40	s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
	41	crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
	42	S = s$d
	43	#Set m-rank(r) singular values to zero, and recompose
	44	#best rank(r) approximation of the initial product
	45	if(rank[r] < length(S))
	46	S[(rank[r]+1):length(S)] = 0
	47	phi[,,r] = s$u %% diag(S) %% t(s$v) %*% Rho[,,r]
	48	}
	49
	50	#Etape E et calcul de LLF
	51	sumLogLLF2 = 0
	52	for(i in seq_len(n))
	53	{
	54	sumLLF1 = 0
	55	maxLogGamIR = -Inf
	56	for (r in seq_len(k))
	57	{
	58	dotProduct = tcrossprod(Y[i,]%%Rho[,,r]-X[i,]%%phi[,,r])
	59	logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
	60	#Z[i] = index of max (gam[i,])
	61	if(logGamIR > maxLogGamIR)
	62	{
	63	Z[i] = r
	64	maxLogGamIR = logGamIR
	65	}
	66	sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
	67	}
	68	sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
	69	}
	70
	71	LLF = -1/n * sumLogLLF2
	72
	73	#update distance parameter to check algorithm convergence (delta(phi, Phi))
	74	deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
	75	if (length(deltaPhi) > deltaPhiBufferSize)
	76	deltaPhi = deltaPhi[2:length(deltaPhi)]
	77	sumDeltaPhi = sum(abs(deltaPhi))
	78
	79	#update other local variables
	80	Phi = phi
	81	ite = ite+1
	82	}
	83	return(list("phi"=phi, "LLF"=LLF))
	84	}