From: Benjamin Auder <benjamin.auder@somewhere> Date: Tue, 14 Jan 2020 17:24:36 +0000 (+0100) Subject: Add a number_of_cores parameter for OpenMP // in Compute_Omega X-Git-Url: https://git.auder.net/variants/current/doc/scripts/getGraph_%22%20%20%20this.image%20%20%20%22.php?a=commitdiff_plain;h=5af71d43f3f2dba21c6667939fcff88923af3b7b;p=morpheus.git Add a number_of_cores parameter for OpenMP // in Compute_Omega --- diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index bd92ed5..2a20240 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -6,7 +6,7 @@ Description: Mixture of logistic regressions parameters (H)estimation with (General Linear Model). For more details see chapter 3 in the PhD thesis of Mor-Absa Loum: <http://www.theses.fr/s156435>, available here <https://tel.archives-ouvertes.fr/tel-01877796/document>. -Version: 1.0-0 +Version: 1.0-2 Author: Benjamin Auder <Benjamin.Auder@u-psud.fr> [aut,cre], Mor-Absa Loum <Mor-Absa.Loum@u-psud.fr> [aut] Maintainer: Benjamin Auder <Benjamin.Auder@u-psud.fr> diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R index f42571d..411e010 100644 --- a/pkg/R/optimParams.R +++ b/pkg/R/optimParams.R @@ -5,6 +5,7 @@ #' @param K Number of populations. #' @param link The link type, 'logit' or 'probit'. #' @param M the empirical cross-moments between X and Y (optional) +#' @param nc Number of cores (default: 0 to use all) #' #' @return An object 'op' of class OptimParams, initialized so that #' \code{op$run(θ0)} outputs the list of optimized parameters @@ -36,7 +37,7 @@ #' o$f( o$linArgs(par1) )} #' #' @export -optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL) +optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL, nc=0) { # Check arguments if (!is.matrix(X) || any(is.na(X))) @@ -61,7 +62,7 @@ optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL) # Build and return optimization algorithm object methods::new("OptimParams", "li"=link, "X"=X, - "Y"=as.integer(Y), "K"=as.integer(K), "Mhat"=as.double(M)) + "Y"=as.integer(Y), "K"=as.integer(K), "Mhat"=as.double(M), "nc"=as.integer(nc)) } # Encapsulated optimization for p (proportions), β and b (regression parameters) @@ -76,6 +77,7 @@ optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL) # @field K Number of populations # @field n Number of sample points # @field d Number of dimensions +# @field nc Number of cores (OpenMP //) # @field W Weights matrix (initialized at identity) # setRefClass( @@ -91,6 +93,7 @@ setRefClass( K = "integer", n = "integer", d = "integer", + nc = "integer", # Weights matrix (generalized least square) W = "matrix" ), @@ -102,7 +105,7 @@ setRefClass( callSuper(...) if (!hasArg("X") || !hasArg("Y") || !hasArg("K") - || !hasArg("li") || !hasArg("Mhat")) + || !hasArg("li") || !hasArg("Mhat") || !hasArg("nc")) { stop("Missing arguments") } @@ -141,7 +144,7 @@ setRefClass( M <- Moments(θ) Omega <- matrix( .C("Compute_Omega", X=as.double(X), Y=as.integer(Y), M=as.double(M), - pn=as.integer(n), pd=as.integer(d), + pnc=as.integer(nc), pn=as.integer(n), pd=as.integer(d), W=as.double(W), PACKAGE="morpheus")$W, nrow=dd, ncol=dd ) MASS::ginv(Omega) }, diff --git a/pkg/src/Makevars b/pkg/src/Makevars index 1f1960a..ee13498 100644 --- a/pkg/src/Makevars +++ b/pkg/src/Makevars @@ -1,4 +1,5 @@ -# Override R weird defaults: -# https://stackoverflow.com/questions/23414448/r-makevars-file-to-overwrite-r-cmds-default-g-options +PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) + #%.o: %.c -# $(CC) -std=c11 -I"$(R_INCLUDE_DIR)" -DNDEBUG -fPIC -O2 -c $< -o $@ +# $(CC) -std=c11 -I"$(R_INCLUDE_DIR)" -DNDEBUG -fPIC -fopenmp -O2 -c $< -o $@ diff --git a/pkg/src/functions.c b/pkg/src/functions.c index b634c98..94ea12d 100644 --- a/pkg/src/functions.c +++ b/pkg/src/functions.c @@ -1,4 +1,5 @@ #include <stdlib.h> +#include <omp.h> // Index matrix (by columns) #define mi(i, j, d1, d2) (j*d1 + i) @@ -50,9 +51,9 @@ void Moments_M3(double* X, double* Y, int* pn, int* pd, double* M3) // W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) ) // with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments -void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W) +void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, double* W) { - int n=*pn, d=*pd; + int nc=*pnc, n=*pn, d=*pd; int dim = d + d*d + d*d*d; //double* W = (double*)malloc(dim*dim*sizeof(double)); @@ -63,6 +64,8 @@ void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W) W[j*dim+k] = 0.0; } double* g = (double*)malloc(dim*sizeof(double)); + omp_set_num_threads(nc >= 1 ? nc : omp_get_num_procs()); + #pragma omp parallel for for (int i=0; i<n; i++) { // g == gi: @@ -92,11 +95,21 @@ void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W) g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j]; } // Add 1/n t(gi) %*% gi to W - for (int j=0; j<dim; j++) + for (int j=dim-1; j>=0; j--) { - for (int k=0; k<dim; k++) - W[j*dim+k] += g[j] * g[k] / n; + // This final nested loop is very costly. Some basic optimisations: + double gj = g[j]; + int baseIdx = j * dim; + #pragma GCC unroll 100 + for (int k=dim-1; k>=0; k--) + W[baseIdx+k] += gj * g[k]; } } + // Normalize W: x 1/n + for (int j=0; j<dim; j++) + { + for (int k=0; k<dim; k++) + W[mi(j,k,dim,dim)] /= n; + } free(g); } diff --git a/pkg/src/morpheus_init.c b/pkg/src/morpheus_init.c index b872488..e0fddb8 100644 --- a/pkg/src/morpheus_init.c +++ b/pkg/src/morpheus_init.c @@ -9,13 +9,13 @@ extern void hungarianAlgorithm(void*, void*, void*); extern void Moments_M2(void*, void*, void*, void*, void*); extern void Moments_M3(void*, void*, void*, void*, void*); -extern void Compute_Omega(void*, void*, void*, void*, void*, void*); +extern void Compute_Omega(void*, void*, void*, void*, void*, void*, void*); static const R_CMethodDef CEntries[] = { {"hungarianAlgorithm", (DL_FUNC) &hungarianAlgorithm, 3}, {"Moments_M2", (DL_FUNC) &Moments_M2, 5}, {"Moments_M3", (DL_FUNC) &Moments_M3, 5}, - {"Compute_Omega", (DL_FUNC) &Compute_Omega, 6}, + {"Compute_Omega", (DL_FUNC) &Compute_Omega, 7}, {NULL, NULL, 0} }; diff --git a/reports/accuracy.R b/reports/accuracy.R index 24191d8..9a9d21d 100644 --- a/reports/accuracy.R +++ b/reports/accuracy.R @@ -10,7 +10,7 @@ optimBeta <- function(N, n, p, beta, b, link, ncores) K <- ncol(fargs$beta) M <- computeMoments(fargs$X, fargs$Y) mu <- computeMu(fargs$X, fargs$Y, list(K=K, M=M)) - op <- optimParams(fargs$X, fargs$Y, K, fargs$link, M) + op <- optimParams(fargs$X, fargs$Y, K, fargs$link, M, 1) #only 1 OpenMP core x_init <- list(p=rep(1/K,K-1), beta=mu, b=rep(0,K)) res2 <- NULL tryCatch({ diff --git a/reports/multistart.R b/reports/multistart.R index 73df538..432f312 100644 --- a/reports/multistart.R +++ b/reports/multistart.R @@ -10,7 +10,7 @@ testMultistart <- function(N, n, p, beta, b, link, nstart, ncores) library(morpheus) K <- ncol(fargs$beta) mu <- computeMu(fargs$X, fargs$Y, list(K=K, M=fargs$M)) - op <- optimParams(fargs$X, fargs$Y, K, fargs$link, fargs$M) + op <- optimParams(fargs$X, fargs$Y, K, fargs$link, fargs$M, 1) x_init <- list(p=rep(1/K,K-1), beta=mu, b=rep(0,K)) res2 <- NULL tryCatch({ diff --git a/reports/run_time_cl.sh b/reports/run_time_cl.sh index 3055072..943d7c1 100644 --- a/reports/run_time_cl.sh +++ b/reports/run_time_cl.sh @@ -1,15 +1,19 @@ #!/bin/bash -#PBS -l nodes=1:ppn=16,mem=8gb,pmem=512mb -#PBS -j oe - -#PBS -o .output +#$ -N morpheus +#$ -wd /workdir2/auder/morpheus/reports +#$ -m abes +#$ -M benjamin@auder.net +#$ -pe make 50 rm -f .output - -WORKDIR=/workdir2/auder/morpheus/reports -cd $WORKDIR +#$ -o .output +#$ -j y module load R +N=1000 +nc=50 +link="logit" + # arg --vanilla maybe possible on cluster -R --slave --args N=1000 nc=16 link=logit <timings.R >out 2>&1 +R --slave --args N=$N nc=$nc link=$link <timings.R >out 2>&1 diff --git a/reports/timings.R b/reports/timings.R index 704fd82..6d26bc0 100644 --- a/reports/timings.R +++ b/reports/timings.R @@ -16,8 +16,8 @@ ourOptim <- function(X, Y, K, link) { M <- computeMoments(X, Y) mu <- computeMu(X, Y, list(K=K,M=M)) - x_init = c(1/2, as.double(mu), c(0,0)) - optimParams(K, link, list(M=M))$run(x_init) + x_init = list(p=rep(1/K,K-1), beta=mu, b=rep(0,K)) + optimParams(X, Y, K, link, M, 1)$run(x_init) NULL }