X-Git-Url: https://git.auder.net/?p=valse.git;a=blobdiff_plain;f=src%2Ftest%2Fgenerate_test_data%2Fhelpers%2FEMGrank.R;h=d4198138a177b8417d5ceddb9109cbd1794d2937;hp=a1e3df7df5bf178f398faa4ede76ffa2f9e9c565;hb=c3bc47052f3ccb659659c59a82e9a99ea842398d;hpb=e39bc178cf5de02489ea2dce3869ba6323e18492 diff --git a/src/test/generate_test_data/helpers/EMGrank.R b/src/test/generate_test_data/helpers/EMGrank.R index a1e3df7..d419813 100644 --- a/src/test/generate_test_data/helpers/EMGrank.R +++ b/src/test/generate_test_data/helpers/EMGrank.R @@ -1,3 +1,11 @@ +#helper to always have matrices as arg (TODO: put this elsewhere? improve?) +matricize <- function(X) +{ + if (!is.matrix(X)) + return (t(as.matrix(X))) + return (X) +} + require(MASS) EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ #matrix dimensions @@ -9,7 +17,7 @@ EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ #init outputs phi = array(0, dim=c(p,m,k)) Z = rep(1, n) - Pi = piInit +# Pi = piInit LLF = 0 #local variables @@ -20,65 +28,58 @@ EMGrank = function(Pi, Rho, mini, maxi, X, Y, tau, rank){ #main loop ite = 1 - while(ite<=mini || (ite<=maxi && sumDeltaPhi>tau)){ + while(ite<=mini || (ite<=maxi && sumDeltaPhi>tau)) + { #M step: Mise à jour de Beta (et donc phi) - for(r in 1:k){ - Z_bin = valse:::vec_bin(Z,r) - Z_vec = Z_bin$vec #vecteur 0 et 1 aux endroits o? Z==r - Z_indice = Z_bin$indice - if(sum(Z_indice) == 0){ + for(r in 1:k) + { + Z_indice = seq_len(n)[Z==r] #indices où Z == r + if (length(Z_indice) == 0) next - } #U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr - sv = svd(ginv( crossprod(X[Z_indice,]) ) %*% crossprod(X[Z_indice,], Y[Z_indice,]) ) - S = diag(sv$d) - U = sv$u - V = sv$v + s = svd( ginv(crossprod(matricize(X[Z_indice,]))) %*% + crossprod(matricize(X[Z_indice,]),matricize(Y[Z_indice,])) ) + S = s$d + U = s$u + V = s$v #Set m-rank(r) singular values to zero, and recompose #best rank(r) approximation of the initial product - if(r==k){ - j_r_1 = length(S) - } - else{ - j_r_1 = c(rank[r]+1:length(S)) - } - S[j_r_1] = 0 - S = diag(S, nrow = ncol(U)) - phi[,,r] = U %*% S %*% t(V) %*% Rho[,,r] + if(rank[r] < length(S)) + S[(rank[r]+1):length(S)] = 0 + phi[,,r] = U %*% diag(S) %*% t(V) %*% Rho[,,r] } - #Etape E et calcul de LLF - sumLogLLF2 = 0 - for(i in 1:n){ - sumLLF1 = 0 - maxLogGamIR = -Inf - for(r in 1:k){ - dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r]) - logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct - #Z[i] = index of max (gam[i,]) - if(logGamIR > maxLogGamIR){ - Z[i] = r - maxLogGamIR = logGamIR - } - sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2) - } - sumLogLLF2 = sumLogLLF2 + log(sumLLF1) - } + #Etape E et calcul de LLF + sumLogLLF2 = 0 + for(i in 1:n){ + sumLLF1 = 0 + maxLogGamIR = -Inf + for(r in 1:k){ + dotProduct = tcrossprod(Y[i,]%*%Rho[,,r]-X[i,]%*%phi[,,r]) + logGamIR = log(Pi[r]) + log(det(Rho[,,r])) - 0.5*dotProduct + #Z[i] = index of max (gam[i,]) + if(logGamIR > maxLogGamIR){ + Z[i] = r + maxLogGamIR = logGamIR + } + sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2) + } + sumLogLLF2 = sumLogLLF2 + log(sumLLF1) + } - LLF = -1/n * sumLogLLF2 - - #update distance parameter to check algorithm convergence (delta(phi, Phi)) - deltaPhi = c(deltaPhi, max(max(max((abs(phi-Phi))/(1+abs(phi))))) ) - if(length(deltaPhi) > deltaPhiBufferSize){ - l_1 = c(2:length(deltaPhi)) - deltaPhi = deltaPhi[l_1] - } - sumDeltaPhi = sum(abs(deltaPhi)) + LLF = -1/n * sumLogLLF2 - #update other local variables - Phi = phi - ite = ite+1 + #update distance parameter to check algorithm convergence (delta(phi, Phi)) + deltaPhi = c(deltaPhi, max(max(max((abs(phi-Phi))/(1+abs(phi))))) ) + if(length(deltaPhi) > deltaPhiBufferSize){ + l_1 = c(2:length(deltaPhi)) + deltaPhi = deltaPhi[l_1] + } + sumDeltaPhi = sum(abs(deltaPhi)) + #update other local variables + Phi = phi + ite = ite+1 } return(list(phi=phi, LLF=LLF)) }