[valse.git] / src / test / generate_test_data / helpers / EMGrank.R

#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
matricize <- function(X)
{
	if (!is.matrix(X))
		return (t(as.matrix(X)))
	return (X)
}

require(MASS)
EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){
  #matrix dimensions
  n = dim(X)[1]
  p = dim(X)[2]
  m = dim(Rho)[2]
  k = dim(Rho)[3]
  
  #init outputs
  phi = array(0, dim=c(p,m,k))
  Z = rep(1, n)
#  Pi = piInit
  LLF = 0
  
  #local variables
  Phi = array(0, dim=c(p,m,k))
  deltaPhi = c(0)
  sumDeltaPhi = 0
  deltaPhiBufferSize = 20
  
  #main loop
  ite = 1
  while(ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
	{
    #M step: Mise à jour de Beta (et donc phi)
    for(r in 1:k)
		{
      Z_indice = seq_len(n)[Z==r] #indices où Z == r
      if (length(Z_indice) == 0)
        next
      #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
      s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
				crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
      S = s$d
      U = s$u
      V = s$v
      #Set m-rank(r) singular values to zero, and recompose
      #best rank(r) approximation of the initial product
      if(rank[r] < length(S))
        S[(rank[r]+1):length(S)] = 0
      phi[,,r] = U %*% diag(S) %*% t(V) %*% Rho[,,r]
    }
  
		#Etape E et calcul de LLF
		sumLogLLF2 = 0
		for(i in 1:n){
			sumLLF1 = 0
			maxLogGamIR = -Inf
			for(r in 1:k){
				dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
				logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
				#Z[i] = index of max (gam[i,])
				if(logGamIR > maxLogGamIR){
					Z[i] = r
					maxLogGamIR = logGamIR
				}
			sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
			}
			sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
		}
  
		LLF = -1/n * sumLogLLF2
  
		#update distance parameter to check algorithm convergence (delta(phi, Phi))
		deltaPhi = c(deltaPhi, max(max(max((abs(phi-Phi))/(1+abs(phi))))) )
		if(length(deltaPhi) > deltaPhiBufferSize){
		  l_1 = c(2:length(deltaPhi))
		  deltaPhi = deltaPhi[l_1]
		}
		sumDeltaPhi = sum(abs(deltaPhi))
  
		#update other local variables
		Phi = phi
		ite = ite+1
  }
  return(list(phi=phi, LLF=LLF))
}
Commit	Line	Data
	1	#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
	2	matricize <- function(X)
	3	{
	4	if (!is.matrix(X))
	5	return (t(as.matrix(X)))
	6	return (X)
	7	}
	8
	9	require(MASS)
	10	EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){
	11	#matrix dimensions
	12	n = dim(X)[1]
	13	p = dim(X)[2]
	14	m = dim(Rho)[2]
	15	k = dim(Rho)[3]
	16
	17	#init outputs
	18	phi = array(0, dim=c(p,m,k))
	19	Z = rep(1, n)
	20	# Pi = piInit
	21	LLF = 0
	22
	23	#local variables
	24	Phi = array(0, dim=c(p,m,k))
	25	deltaPhi = c(0)
	26	sumDeltaPhi = 0
	27	deltaPhiBufferSize = 20
	28
	29	#main loop
	30	ite = 1
	31	while(ite<=mini \|\| (ite<=maxi && sumDeltaPhi>tau))
	32	{
	33	#M step: Mise à jour de Beta (et donc phi)
	34	for(r in 1:k)
	35	{
	36	Z_indice = seq_len(n)[Z==r] #indices où Z == r
	37	if (length(Z_indice) == 0)
	38	next
	39	#U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
	40	s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
	41	crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
	42	S = s$d
	43	U = s$u
	44	V = s$v
	45	#Set m-rank(r) singular values to zero, and recompose
	46	#best rank(r) approximation of the initial product
	47	if(rank[r] < length(S))
	48	S[(rank[r]+1):length(S)] = 0
	49	phi[,,r] = U %% diag(S) %% t(V) %*% Rho[,,r]
	50	}
	51
	52	#Etape E et calcul de LLF
	53	sumLogLLF2 = 0
	54	for(i in 1:n){
	55	sumLLF1 = 0
	56	maxLogGamIR = -Inf
	57	for(r in 1:k){
	58	dotProduct = tcrossprod(Y[i,]%%Rho[,,r]-X[i,]%%phi[,,r])
	59	logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
	60	#Z[i] = index of max (gam[i,])
	61	if(logGamIR > maxLogGamIR){
	62	Z[i] = r
	63	maxLogGamIR = logGamIR
	64	}
	65	sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
	66	}
	67	sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
	68	}
	69
	70	LLF = -1/n * sumLogLLF2
	71
	72	#update distance parameter to check algorithm convergence (delta(phi, Phi))
	73	deltaPhi = c(deltaPhi, max(max(max((abs(phi-Phi))/(1+abs(phi))))) )
	74	if(length(deltaPhi) > deltaPhiBufferSize){
	75	l_1 = c(2:length(deltaPhi))
	76	deltaPhi = deltaPhi[l_1]
	77	}
	78	sumDeltaPhi = sum(abs(deltaPhi))
	79
	80	#update other local variables
	81	Phi = phi
	82	ite = ite+1
	83	}
	84	return(list(phi=phi, LLF=LLF))
	85	}