X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=src%2Ftest%2Fgenerate_test_data%2Fhelpers%2FEMGrank.R;h=d4198138a177b8417d5ceddb9109cbd1794d2937;hb=c3bc47052f3ccb659659c59a82e9a99ea842398d;hp=0ee2d1fa808991d73948ab54a189802a934e0e02;hpb=c202886908a6eff2cd704f910a0bde06be5a0875;p=valse.git diff --git a/src/test/generate_test_data/helpers/EMGrank.R b/src/test/generate_test_data/helpers/EMGrank.R index 0ee2d1f..d419813 100644 --- a/src/test/generate_test_data/helpers/EMGrank.R +++ b/src/test/generate_test_data/helpers/EMGrank.R @@ -1,4 +1,13 @@ -EMGLLF = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ +#helper to always have matrices as arg (TODO: put this elsewhere? improve?) +matricize <- function(X) +{ + if (!is.matrix(X)) + return (t(as.matrix(X))) + return (X) +} + +require(MASS) +EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ #matrix dimensions n = dim(X)[1] p = dim(X)[2] @@ -8,7 +17,7 @@ EMGLLF = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ #init outputs phi = array(0, dim=c(p,m,k)) Z = rep(1, n) - Pi = piInit +# Pi = piInit LLF = 0 #local variables @@ -19,54 +28,58 @@ EMGLLF = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ #main loop ite = 1 - while(ite<=mini || (ite<=maxi && sumDeltaPhi>tau)){ + while(ite<=mini || (ite<=maxi && sumDeltaPhi>tau)) + { #M step: Mise à jour de Beta (et donc phi) - for(r in 1:k){ - Z_bin = vec_bin(Z,r) - Z_vec = Z_bin$vec #vecteur 0 et 1 aux endroits o? Z==r - Z_indice = Z_bin$indice - if(sum(Z_indice) == 0){ + for(r in 1:k) + { + Z_indice = seq_len(n)[Z==r] #indices où Z == r + if (length(Z_indice) == 0) next - } #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr - [U,S,V] = svd(ginv(crossprod(X[Z_indice,]))%*% (X[Z_indice,])%*%Y[Z_indice,] ) + s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*% + crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) ) + S = s$d + U = s$u + V = s$v #Set m-rank(r) singular values to zero, and recompose #best rank(r) approximation of the initial product - S[rank(r)+1:end,] = 0 - phi[,,r] = U %*%S%*%t(V)%*%Rho[,,r] + if(rank[r] < length(S)) + S[(rank[r]+1):length(S)] = 0 + phi[,,r] = U %*% diag(S) %*% t(V) %*% Rho[,,r] } - #Etape E et calcul de LLF - sumLogLLF2 = 0 - for(i in 1:n){ - sumLLF1 = 0 - maxLogGamIR = -Inf - for(r in 1:k){ - dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r]) - logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct - #Z[i] = index of max (gam[i,]) - if(logGamIR > maxLogGamIR){ - Z[i] = r - maxLogGamIR = logGamIR - } - sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2) - } - sumLogLLF2 = sumLogLLF2 + log(sumLLF1) - } + #Etape E et calcul de LLF + sumLogLLF2 = 0 + for(i in 1:n){ + sumLLF1 = 0 + maxLogGamIR = -Inf + for(r in 1:k){ + dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r]) + logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct + #Z[i] = index of max (gam[i,]) + if(logGamIR > maxLogGamIR){ + Z[i] = r + maxLogGamIR = logGamIR + } + sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2) + } + sumLogLLF2 = sumLogLLF2 + log(sumLLF1) + } - LLF = -1/n * sumLogLLF2 - - #update distance parameter to check algorithm convergence (delta(phi, Phi)) - deltaPhi = c(deltaPhi, max(max(max((abs(phi-Phi))/(1+abs(phi))))) ) - if(length(deltaPhi) > deltaPhiBufferSize){ - deltaPhi = deltaPhi[2:length(deltaPhi)] - } - sumDeltaPhi = sum(abs(deltaPhi)) + LLF = -1/n * sumLogLLF2 - #update other local variables - Phi = phi - ite = ite+1 + #update distance parameter to check algorithm convergence (delta(phi, Phi)) + deltaPhi = c(deltaPhi, max(max(max((abs(phi-Phi))/(1+abs(phi))))) ) + if(length(deltaPhi) > deltaPhiBufferSize){ + l_1 = c(2:length(deltaPhi)) + deltaPhi = deltaPhi[l_1] + } + sumDeltaPhi = sum(abs(deltaPhi)) + #update other local variables + Phi = phi + ite = ite+1 } return(list(phi=phi, LLF=LLF)) -} \ No newline at end of file +}