X-Git-Url: https://git.auder.net/?p=valse.git;a=blobdiff_plain;f=pkg%2FR%2FEMGrank.R;h=0e68cb4ecdf4dd9f9270557823d76a3875a7f72e;hp=71f7c5be52474f0896424cc3ea1a17ab14843be4;hb=a3105972158da4773b33d41e1ead65a942c15f80;hpb=4fed76cce5e3347a8f82457603d77bf38a02b619

diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R
index 71f7c5b..0e68cb4 100644
--- a/pkg/R/EMGrank.R
+++ b/pkg/R/EMGrank.R
@@ -2,15 +2,123 @@
 #'
 #' Description de EMGrank
 #'
-#' @param Pi ...
+#' @param phiInit ...
+#' @param Pi Parametre de proportion
+#' @param Rho Parametre initial de variance renormalisÃ©
+#' @param mini Nombre minimal d'itÃ©rations dans l'algorithme EM
+#' @param maxi Nombre maximal d'itÃ©rations dans l'algorithme EM
+#' @param X RÃ©gresseurs
+#' @param Y RÃ©ponse
+#' @param tau Seuil pour accepter la convergence
+#' @param rank Vecteur des rangs possibles
 #'
-#' @return ...
+#' @return A list ...
+#'   phi : parametre de moyenne renormalisÃ©, calculÃ© par l'EM
+#'   LLF : log vraisemblance associÃ© Ã  cet Ã©chantillon, pour les valeurs estimÃ©es des paramÃ¨tres
 #'
-#' @examples
-#' ...
-#' ...
 #' @export
-EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast=TRUE)
 {
-	.Call("EMGrank", Pi, Rho, mini, maxi, X, Y, tau, rank, PACKAGE="valse")
+	if (!fast)
+	{
+		# Function in R
+		return (.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
+	}
+
+	# Function in C
+	n = nrow(X) #nombre d'echantillons
+	p = ncol(X) #nombre de covariables
+	m = ncol(Y) #taille de Y (multivariÃ©)
+	k = length(Pi) #nombre de composantes dans le mÃ©lange
+	.Call("EMGrank",
+		Pi, Rho, mini, maxi, X, Y, tau, rank,
+		phi=double(p*m*k), LLF=double(1),
+		n, p, m, k,
+		PACKAGE="valse")
+}
+
+#helper to always have matrices as arg (TODO: put this elsewhere? improve?)
+# --> Yes, we should use by-columns storage everywhere... [later!]
+matricize <- function(X)
+{
+	if (!is.matrix(X))
+		return (t(as.matrix(X)))
+	return (X)
+}
+
+# R version - slow but easy to read
+.EMGrank_R = function(Pi, Rho, mini, maxi, X, Y, tau, rank)
+{
+  #matrix dimensions
+  n = dim(X)[1]
+  p = dim(X)[2]
+  m = dim(Rho)[2]
+  k = dim(Rho)[3]
+
+  #init outputs
+  phi = array(0, dim=c(p,m,k))
+  Z = rep(1, n)
+  LLF = 0
+
+  #local variables
+  Phi = array(0, dim=c(p,m,k))
+  deltaPhi = c()
+  sumDeltaPhi = 0.
+  deltaPhiBufferSize = 20
+
+  #main loop
+  ite = 1
+  while (ite<=mini || (ite<=maxi && sumDeltaPhi>tau))
+	{
+    #M step: Mise Ã  jour de Beta (et donc phi)
+    for(r in 1:k)
+		{
+      Z_indice = seq_len(n)[Z==r] #indices oÃ¹ Z == r
+      if (length(Z_indice) == 0)
+        next
+      #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
+      s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*%
+				crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) )
+      S = s$d
+      #Set m-rank(r) singular values to zero, and recompose
+      #best rank(r) approximation of the initial product
+      if(rank[r] < length(S))
+        S[(rank[r]+1):length(S)] = 0
+      phi[,,r] = s$u %*% diag(S) %*% t(s$v) %*% Rho[,,r]
+    }
+
+		#Etape E et calcul de LLF
+		sumLogLLF2 = 0
+		for(i in seq_len(n))
+		{
+			sumLLF1 = 0
+			maxLogGamIR = -Inf
+			for (r in seq_len(k))
+			{
+				dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r])
+				logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct
+				#Z[i] = index of max (gam[i,])
+				if(logGamIR > maxLogGamIR)
+				{
+					Z[i] = r
+					maxLogGamIR = logGamIR
+				}
+				sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2)
+			}
+			sumLogLLF2 = sumLogLLF2 + log(sumLLF1)
+		}
+  
+		LLF = -1/n * sumLogLLF2
+
+		#update distance parameter to check algorithm convergence (delta(phi, Phi))
+		deltaPhi = c( deltaPhi, max( (abs(phi-Phi)) / (1+abs(phi)) ) ) #TODO: explain?
+		if (length(deltaPhi) > deltaPhiBufferSize)
+		  deltaPhi = deltaPhi[2:length(deltaPhi)]
+		sumDeltaPhi = sum(abs(deltaPhi))
+
+		#update other local variables
+		Phi = phi
+		ite = ite+1
+  }
+  return(list("phi"=phi, "LLF"=LLF))
 }