From: Benjamin Auder Date: Tue, 11 Apr 2017 09:57:11 +0000 (+0200) Subject: fix test; EMGLLF.c != EMGLLF.R now... X-Git-Url: https://git.auder.net/js/pieces/cp.svg?a=commitdiff_plain;h=321e13a991a5a0e6c97225fdca436870e5e805d1;p=valse.git fix test; EMGLLF.c != EMGLLF.R now... --- diff --git a/CCC.R b/CCC.R deleted file mode 100644 index 9a17c08..0000000 --- a/CCC.R +++ /dev/null @@ -1,86 +0,0 @@ -#' constructionModelesLassoMLE -#' -#' TODO: description -#' -#' @param ... -#' -#' @return ... -#' -#' export -constructionModelesLassoMLE = function(phiInit, rhoInit, piInit, gamInit, mini, maxi, - gamma, X, Y, seuil, tau, selected, ncores=3, verbose=FALSE) -{ - if (ncores > 1) - { - cl = parallel::makeCluster(ncores) - parallel::clusterExport( cl, envir=environment(), - varlist=c("phiInit","rhoInit","gamInit","mini","maxi","gamma","X","Y","seuil", - "tau","selected","ncores","verbose") ) - } - - # Individual model computation - computeAtLambda <- function(lambda) - { - if (ncores > 1) - require("valse") #// nodes start with an ampty environment - - if (verbose) - print(paste("Computations for lambda=",lambda)) - - n = dim(X)[1] - p = dim(phiInit)[1] - m = dim(phiInit)[2] - k = dim(phiInit)[3] - - sel.lambda = selected[[lambda]] -# col.sel = which(colSums(sel.lambda)!=0) #if boolean matrix - col.sel <- which( sapply(sel.lambda,length) > 0 ) #if list of selected vars - - if (length(col.sel) == 0) - return (NULL) - - # lambda == 0 because we compute the EMV: no penalization here - res = EMGLLF(phiInit[col.sel,,],rhoInit,piInit,gamInit,mini,maxi,gamma,0, - X[,col.sel],Y,tau) - - # Eval dimension from the result + selected - phiLambda2 = res_EM$phi - rhoLambda = res_EM$rho - piLambda = res_EM$pi - phiLambda = array(0, dim = c(p,m,k)) - for (j in seq_along(col.sel)) - phiLambda[col.sel[j],,] = phiLambda2[j,,] - - dimension = 0 - for (j in 1:p) - { - b = setdiff(1:m, sel.lambda[,j]) - if (length(b) > 0) - phiLambda[j,b,] = 0.0 - dimension = dimension + sum(sel.lambda[,j]!=0) - } - - # on veut calculer la vraisemblance avec toutes nos estimations - densite = vector("double",n) - for (r in 1:k) - { - delta = Y%*%rhoLambda[,,r] - (X[, col.sel]%*%phiLambda[col.sel,,r]) - densite = densite + piLambda[r] * - det(rhoLambda[,,r])/(sqrt(2*base::pi))^m * exp(-tcrossprod(delta)/2.0) - } - llhLambda = c( sum(log(densite)), (dimension+m+1)*k-1 ) - list("phi"= phiLambda, "rho"= rhoLambda, "pi"= piLambda, "llh" = llhLambda) - } - - #Pour chaque lambda de la grille, on calcule les coefficients - out = - if (ncores > 1) - parLapply(cl, glambda, computeAtLambda) - else - lapply(glambda, computeAtLambda) - - if (ncores > 1) - parallel::stopCluster(cl) - - out -} diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R index 496d4e5..069c470 100644 --- a/pkg/R/generateXY.R +++ b/pkg/R/generateXY.R @@ -17,12 +17,12 @@ generateXY = function(n, π, meanX, β, covX, covY) p <- dim(covX)[1] m <- dim(covY)[1] k <- dim(covY)[3] - + X <- matrix(nrow=0, ncol=p) Y <- matrix(nrow=0, ncol=m) #random generation of the size of each population in X~Y (unordered) - sizePop <- rmultinom(1, n, pi) + sizePop <- rmultinom(1, n, π) class <- c() #map i in 1:n --> index of class in 1:k for (i in 1:k) @@ -30,8 +30,8 @@ generateXY = function(n, π, meanX, β, covX, covY) class <- c(class, rep(i, sizePop[i])) newBlockX <- MASS::mvrnorm(sizePop[i], meanX, covX) X <- rbind( X, newBlockX ) - Y <- rbind( Y, apply( newBlockX, 1, function(row) - mvrnorm(1, row %*% beta[,,i], covY[,,i]) ) ) + Y <- rbind( Y, t(apply( newBlockX, 1, function(row) + MASS::mvrnorm(1, row %*% β[,,i], covY[,,i]) )) ) } shuffle = sample(n) diff --git a/test/generate_test_data/EMGLLF.R b/test/generate_test_data/EMGLLF.R index 039e291..09ae2e3 100644 --- a/test/generate_test_data/EMGLLF.R +++ b/test/generate_test_data/EMGLLF.R @@ -1,156 +1,143 @@ EMGLLF_R = function(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau) { - #matrix dimensions - n = dim(X)[1] - p = dim(phiInit)[1] - m = dim(phiInit)[2] - k = dim(phiInit)[3] - - #init outputs - phi = phiInit - rho = rhoInit - pi = piInit - LLF = rep(0, maxi) - S = array(0, dim=c(p,m,k)) - - gam = gamInit - Gram2 = array(0, dim=c(p,p,k)) - ps2 = array(0, dim=c(p,m,k)) - b = rep(0, k) - X2 = array(0, dim=c(n,p,k)) - Y2 = array(0, dim=c(n,m,k)) - dist = 0 - dist2 = 0 - ite = 1 - pi2 = rep(0, k) - ps = matrix(0, m,k) - nY2 = matrix(0, m,k) - ps1 = array(0, dim=c(n,m,k)) - Gam = matrix(0, n,k) - EPS = 1E-15 - - while(ite <= mini || (ite<= maxi && (dist>= tau || dist2 >= sqrt(tau)))) + # Matrix dimensions + n = dim(X)[1] + p = dim(phiInit)[1] + m = dim(phiInit)[2] + k = dim(phiInit)[3] + + # Outputs + phi = phiInit + rho = rhoInit + pi = piInit + llh = -Inf + S = array(0, dim=c(p,m,k)) + + # Algorithm variables + gam = gamInit + Gram2 = array(0, dim=c(p,p,k)) + ps2 = array(0, dim=c(p,m,k)) + X2 = array(0, dim=c(n,p,k)) + Y2 = array(0, dim=c(n,m,k)) + EPS = 1e-15 + + for (ite in 1:maxi) { - Phi = phi - Rho = rho - Pi = pi + # Remember last pi,rho,phi values for exit condition in the end of loop + Phi = phi + Rho = rho + Pi = pi - #calcul associé à Y et X - for(r in 1:k) + # Calcul associé à Y et X + for (r in 1:k) { - for (mm in 1:m) - Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm] - for (i in 1:n) - X2[i,,r] = sqrt(gam[i,r]) * X[i,] - for (mm in 1:m) - ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r]) - for (j in 1:p) + for (mm in 1:m) + Y2[,mm,r] = sqrt(gam[,r]) * Y[,mm] + for (i in 1:n) + X2[i,,r] = sqrt(gam[i,r]) * X[i,] + for (mm in 1:m) + ps2[,mm,r] = crossprod(X2[,,r],Y2[,mm,r]) + for (j in 1:p) { - for (s in 1:p) - Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r]) - } - } - - ########## - #Etape M # - ########## - - #pour pi - for (r in 1:k) - b[r] = sum(abs(phi[,,r])) - gam2 = colSums(gam) - a = sum(gam %*% log(pi)) - - #tant que les props sont negatives - kk = 0 - pi2AllPositive = FALSE - while (!pi2AllPositive) + for (s in 1:p) + Gram2[j,s,r] = crossprod(X2[,j,r], X2[,s,r]) + } + } + + ########## + #Etape M # + ########## + + # Pour pi + b = sapply( 1:k, function(r) sum(abs(phi[,,r])) ) + gam2 = colSums(gam) + a = sum(gam %*% log(pi)) + + # Tant que les props sont negatives + kk = 0 + pi2AllPositive = FALSE + while (!pi2AllPositive) { - pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi) - pi2AllPositive = all(pi2 >= 0) - kk = kk+1 - } + pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi) + pi2AllPositive = all(pi2 >= 0) + kk = kk+1 + } - #t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante - while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < + # t(m) la plus grande valeur dans la grille O.1^k tel que ce soit décroissante ou constante + while( kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b) ) { - pi2 = pi + 0.1^kk * (1/n*gam2 - pi) - kk = kk + 1 - } - t = 0.1^kk - pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi)) - - #Pour phi et rho - for (r in 1:k) + pi2 = pi + 0.1^kk * (1/n*gam2 - pi) + kk = kk + 1 + } + t = 0.1^kk + pi = (pi + t*(pi2-pi)) / sum(pi + t*(pi2-pi)) + + #Pour phi et rho + for (r in 1:k) { - for (mm in 1:m) + for (mm in 1:m) { - for (i in 1:n) - { - ps1[i,mm,r] = Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r]) - } - ps[mm,r] = sum(ps1[,mm,r]) - nY2[mm,r] = sum(Y2[,mm,r]^2) - rho[mm,mm,r] = (ps[mm,r]+sqrt(ps[mm,r]^2+4*nY2[mm,r]*gam2[r])) / (2*nY2[mm,r]) + ps = 0 + for (i in 1:n) + ps = ps + Y2[i,mm,r] * sum(X2[i,,r] * phi[,mm,r]) + nY2 = sum(Y2[,mm,r]^2) + rho[mm,mm,r] = (ps+sqrt(ps^2+4*nY2*gam2[r])) / (2*nY2) } - } + } - for (r in 1:k) + for (r in 1:k) { - for (j in 1:p) + for (j in 1:p) { - for (mm in 1:m) + for (mm in 1:m) { - S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r]) + S[j,mm,r] = -rho[mm,mm,r]*ps2[j,mm,r] + sum(phi[-j,mm,r] * Gram2[j,-j,r]) if (abs(S[j,mm,r]) <= n*lambda*(pi[r]^gamma)) - phi[j,mm,r]=0 - else if(S[j,mm,r] > n*lambda*(pi[r]^gamma)) - phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r] - else - phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r] - } - } - } - - ########## - #Etape E # - ########## - - sumLogLLF2 = 0 - for (i in 1:n) + phi[j,mm,r]=0 + else if(S[j,mm,r] > n*lambda*(pi[r]^gamma)) + phi[j,mm,r] = (n*lambda*(pi[r]^gamma)-S[j,mm,r]) / Gram2[j,j,r] + else + phi[j,mm,r] = -(n*lambda*(pi[r]^gamma)+S[j,mm,r]) / Gram2[j,j,r] + } + } + } + + ########## + #Etape E # + ########## + + # Precompute det(rho[,,r]) for r in 1...k + detRho = sapply(1:k, function(r) det(rho[,,r])) + + sumLogLLH = 0 + for (i in 1:n) { - #precompute sq norms to numerically adjust their values - sqNorm2 = rep(0,k) - for (r in 1:k) - sqNorm2[r] = sum( (Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2 ) - - #compute Gam[,] - sumLLF1 = 0.0; - for (r in 1:k) + # Update gam[,] + sumGamI = 0 + for (r in 1:k) { - Gam[i,r] = pi[r] * exp(-0.5*sqNorm2[r]) * det(rho[,,r]) - sumLLF1 = sumLLF1 + Gam[i,r] / (2*base::pi)^(m/2) - } - sumLogLLF2 = sumLogLLF2 + log(sumLLF1) - sumGamI = sum(Gam[i,]) - if(sumGamI > EPS) - gam[i,] = Gam[i,] / sumGamI - else - gam[i,] = rep(0,k) - } - - sumPen = sum(pi^gamma * b) - LLF[ite] = -sumLogLLF2/n + lambda*sumPen - dist = ifelse( ite == 1, LLF[ite], (LLF[ite]-LLF[ite-1]) / (1+abs(LLF[ite])) ) - Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) ) - Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) - Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) ) - dist2 = max(Dist1,Dist2,Dist3) - - ite = ite+1 - } - - affec = apply(gam, 1, which.max) - return(list("phi"=phi, "rho"=rho, "pi"=pi, "LLF"=LLF, "S"=S, "affec" = affec )) + gam[i,r] = pi[r]*exp(-0.5*sum((Y[i,]%*%rho[,,r]-X[i,]%*%phi[,,r])^2))*detRho[r] + sumGamI = sumGamI + gam[i,r] + } + sumLogLLH = sumLogLLH + log(sumGamI) - log((2*base::pi)^(m/2)) + if (sumGamI > EPS) #else: gam[i,] is already ~=0 + gam[i,] = gam[i,] / sumGamI + } + + sumPen = sum(pi^gamma * b) + last_llh = llh + llh = -sumLogLLH/n + lambda*sumPen + dist = ifelse( ite == 1, llh, (llh-last_llh) / (1+abs(llh)) ) + Dist1 = max( (abs(phi-Phi)) / (1+abs(phi)) ) + Dist2 = max( (abs(rho-Rho)) / (1+abs(rho)) ) + Dist3 = max( (abs(pi-Pi)) / (1+abs(Pi)) ) + dist2 = max(Dist1,Dist2,Dist3) + + if (ite >= mini && (dist >= tau || dist2 >= sqrt(tau))) + break + } + + affec = apply(gam, 1, which.max) + list( "phi"=phi, "rho"=rho, "pi"=pi, "llh"=llh, "S"=S, "affec"=affec ) } diff --git a/test/generate_test_data/generateRunSaveTest_EMGLLF.R b/test/generate_test_data/generateRunSaveTest_EMGLLF.R index bf68ab3..8fe1f33 100644 --- a/test/generate_test_data/generateRunSaveTest_EMGLLF.R +++ b/test/generate_test_data/generateRunSaveTest_EMGLLF.R @@ -8,8 +8,8 @@ generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, dir.create(testFolder, showWarnings=FALSE, mode="0755") require(valse) - params = valse:::basicInitParameters(n, p, m, k) - xy = valse:::generateXYdefault(n, p, m, k) + params = basicInitParameters(n, p, m, k) + xy = generateXYdefault(n, p, m, k) #save inputs write.table(as.double(params$phiInit), paste(testFolder,"phiInit",sep=""), @@ -44,7 +44,7 @@ generateRunSaveTest_EMGLLF = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, write.table(as.double(res$phi), paste(testFolder,"phi",sep=""), row.names=F, col.names=F) write.table(as.double(res$rho), paste(testFolder,"rho",sep=""), row.names=F, col.names=F) write.table(as.double(res$pi), paste(testFolder,"pi",sep=""), row.names=F, col.names=F) - write.table(as.double(res$LLF), paste(testFolder,"LLF",sep=""), row.names=F, col.names=F) + write.table(as.double(res$llh), paste(testFolder,"llh",sep=""), row.names=F, col.names=F) write.table(as.double(res$S), paste(testFolder,"S",sep=""), row.names=F, col.names=F) write.table(as.integer(res$affec), paste(testFolder,"affec",sep=""), row.names=F, col.names=F) } diff --git a/test/generate_test_data/generateRunSaveTest_EMGrank.R b/test/generate_test_data/generateRunSaveTest_EMGrank.R index f348d71..14c1b2a 100644 --- a/test/generate_test_data/generateRunSaveTest_EMGrank.R +++ b/test/generate_test_data/generateRunSaveTest_EMGrank.R @@ -10,7 +10,7 @@ generateRunSaveTest_EMGrank = function(n=200, p=15, m=10, k=3, mini=5, maxi=10, for(i in 1:k) rho[,,i] = diag(1,m) require(valse) - xy = valse:::generateXYdefault(n, p, m, k) + xy = generateXYdefault(n, p, m, k) testFolder = "../data/" dir.create(testFolder, showWarnings=FALSE, mode="0755") diff --git a/test/generate_test_data/helper.R b/test/generate_test_data/helper.R index 49cd1b5..8ec122b 100644 --- a/test/generate_test_data/helper.R +++ b/test/generate_test_data/helper.R @@ -1,10 +1,12 @@ #' Generate a sample of (X,Y) of size n with default values +#' #' @param n sample size #' @param p number of covariates #' @param m size of the response #' @param k number of clusters +#' #' @return list with X and Y -#' @export +#' generateXYdefault = function(n, p, m, k) { meanX = rep(0, p) @@ -12,27 +14,30 @@ generateXYdefault = function(n, p, m, k) covY = array(dim=c(m,m,k)) for(r in 1:k) covY[,,r] = diag(m) - pi = rep(1./k,k) + π = rep(1./k,k) #initialize beta to a random number of non-zero random value - beta = array(0, dim=c(p,m,k)) + β = array(0, dim=c(p,m,k)) for (j in 1:p) { nonZeroCount = sample(1:m, 1) - beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k) + β[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k) } - sample_IO = generateXY(meanX, covX, covY, pi, beta, n) + sample_IO = generateXY(n, π, meanX, β, covX, covY) return (list(X=sample_IO$X,Y=sample_IO$Y)) } -#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights, -#' identity for covariance matrices, and uniformly distributed for the clustering) +#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for +#' weights, identity for covariance matrices, and uniformly distributed for the +#' clustering) +#' #' @param n sample size #' @param p number of covariates #' @param m size of the response #' @param k number of clusters +#' #' @return list with phiInit, rhoInit,piInit,gamInit -#' @export +#' basicInitParameters = function(n,p,m,k) { phiInit = array(0, dim=c(p,m,k)) @@ -49,5 +54,5 @@ basicInitParameters = function(n,p,m,k) gamInit[i,R[i]] = 0.9 gamInit = gamInit/sum(gamInit[1,]) - return (list("phiInit" = phiInit, "rhoInit" = rhoInit, "piInit" = piInit, "gamInit" = gamInit)) + return (list("phiInit"=phiInit, "rhoInit"=rhoInit, "piInit"=piInit, "gamInit"=gamInit)) } diff --git a/test/test.EMGLLF.c b/test/test.EMGLLF.c index 7eed301..fa6e36c 100644 --- a/test/test.EMGLLF.c +++ b/test/test.EMGLLF.c @@ -31,7 +31,7 @@ int main(int argc, char** argv) Real* phi = (Real*)malloc(p*m*k*sizeof(Real)); Real* rho = (Real*)malloc(m*m*k*sizeof(Real)); Real* pi = (Real*)malloc(k*sizeof(Real)); - Real* LLF = (Real*)malloc(maxi*sizeof(Real)); + Real llh; Real* S = (Real*)malloc(p*m*k*sizeof(Real)); int* affec = (int*)malloc(n*sizeof(int)); ///////////// @@ -39,7 +39,7 @@ int main(int argc, char** argv) //////////////////// // Call to EMGLLF // EMGLLF_core(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau, - phi,rho,pi,LLF,S,affec, + phi,rho,pi,&llh,S,affec, n,p,m,k); //////////////////// @@ -66,10 +66,8 @@ int main(int argc, char** argv) free(pi); free(ref_pi); - Real* ref_LLF = readArray_real("LLF"); - compareArray_real("LLF", LLF, ref_LLF, maxi); - free(LLF); - free(ref_LLF); + Real ref_llh = read_real("llh"); + compareArray_real("llh", &llh, &ref_llh, 1); Real* ref_S = readArray_real("S"); compareArray_real("S", S, ref_S, p*m*k);