From 5af71d43f3f2dba21c6667939fcff88923af3b7b Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Tue, 14 Jan 2020 18:24:36 +0100
Subject: [PATCH] Add a number_of_cores parameter for OpenMP // in
 Compute_Omega

---
 pkg/DESCRIPTION         |  2 +-
 pkg/R/optimParams.R     | 11 +++++++----
 pkg/src/Makevars        |  7 ++++---
 pkg/src/functions.c     | 23 ++++++++++++++++++-----
 pkg/src/morpheus_init.c |  4 ++--
 reports/accuracy.R      |  2 +-
 reports/multistart.R    |  2 +-
 reports/run_time_cl.sh  | 20 ++++++++++++--------
 reports/timings.R       |  4 ++--
 9 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION
index bd92ed5..2a20240 100644
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -6,7 +6,7 @@ Description: Mixture of logistic regressions parameters (H)estimation with
     (General Linear Model). For more details see chapter 3 in the PhD thesis of
 		Mor-Absa Loum: <http://www.theses.fr/s156435>, available here
 		<https://tel.archives-ouvertes.fr/tel-01877796/document>.
-Version: 1.0-0
+Version: 1.0-2
 Author: Benjamin Auder <Benjamin.Auder@u-psud.fr> [aut,cre],
     Mor-Absa Loum <Mor-Absa.Loum@u-psud.fr> [aut]
 Maintainer: Benjamin Auder <Benjamin.Auder@u-psud.fr>
diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R
index f42571d..411e010 100644
--- a/pkg/R/optimParams.R
+++ b/pkg/R/optimParams.R
@@ -5,6 +5,7 @@
 #' @param K Number of populations.
 #' @param link The link type, 'logit' or 'probit'.
 #' @param M the empirical cross-moments between X and Y (optional)
+#' @param nc Number of cores (default: 0 to use all)
 #'
 #' @return An object 'op' of class OptimParams, initialized so that
 #'   \code{op$run(θ0)} outputs the list of optimized parameters
@@ -36,7 +37,7 @@
 #' o$f( o$linArgs(par1) )}
 #'
 #' @export
-optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL)
+optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL, nc=0)
 {
   # Check arguments
   if (!is.matrix(X) || any(is.na(X)))
@@ -61,7 +62,7 @@ optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL)
 
   # Build and return optimization algorithm object
   methods::new("OptimParams", "li"=link, "X"=X,
-    "Y"=as.integer(Y), "K"=as.integer(K), "Mhat"=as.double(M))
+    "Y"=as.integer(Y), "K"=as.integer(K), "Mhat"=as.double(M), "nc"=as.integer(nc))
 }
 
 # Encapsulated optimization for p (proportions), β and b (regression parameters)
@@ -76,6 +77,7 @@ optimParams <- function(X, Y, K, link=c("logit","probit"), M=NULL)
 # @field K Number of populations
 # @field n Number of sample points
 # @field d Number of dimensions
+# @field nc Number of cores (OpenMP //)
 # @field W Weights matrix (initialized at identity)
 #
 setRefClass(
@@ -91,6 +93,7 @@ setRefClass(
     K = "integer",
     n = "integer",
     d = "integer",
+    nc = "integer",
     # Weights matrix (generalized least square)
     W = "matrix"
   ),
@@ -102,7 +105,7 @@ setRefClass(
 
       callSuper(...)
       if (!hasArg("X") || !hasArg("Y") || !hasArg("K")
-        || !hasArg("li") || !hasArg("Mhat"))
+        || !hasArg("li") || !hasArg("Mhat") || !hasArg("nc"))
       {
         stop("Missing arguments")
       }
@@ -141,7 +144,7 @@ setRefClass(
       M <- Moments(θ)
       Omega <- matrix( .C("Compute_Omega",
         X=as.double(X), Y=as.integer(Y), M=as.double(M),
-        pn=as.integer(n), pd=as.integer(d),
+        pnc=as.integer(nc), pn=as.integer(n), pd=as.integer(d),
         W=as.double(W), PACKAGE="morpheus")$W, nrow=dd, ncol=dd )
       MASS::ginv(Omega)
     },
diff --git a/pkg/src/Makevars b/pkg/src/Makevars
index 1f1960a..ee13498 100644
--- a/pkg/src/Makevars
+++ b/pkg/src/Makevars
@@ -1,4 +1,5 @@
-# Override R weird defaults:
-# https://stackoverflow.com/questions/23414448/r-makevars-file-to-overwrite-r-cmds-default-g-options
+PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS)
+PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
+
 #%.o: %.c
-#	$(CC) -std=c11 -I"$(R_INCLUDE_DIR)" -DNDEBUG -fPIC -O2 -c $< -o $@
+#	$(CC) -std=c11 -I"$(R_INCLUDE_DIR)" -DNDEBUG -fPIC -fopenmp -O2 -c $< -o $@
diff --git a/pkg/src/functions.c b/pkg/src/functions.c
index b634c98..94ea12d 100644
--- a/pkg/src/functions.c
+++ b/pkg/src/functions.c
@@ -1,4 +1,5 @@
 #include <stdlib.h>
+#include <omp.h>
 
 // Index matrix (by columns)
 #define mi(i, j, d1, d2) (j*d1 + i)
@@ -50,9 +51,9 @@ void Moments_M3(double* X, double* Y, int* pn, int* pd, double* M3)
 
 // W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) )
 // with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments
-void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
+void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, double* W)
 {
-  int n=*pn, d=*pd;
+  int nc=*pnc, n=*pn, d=*pd;
   int dim = d + d*d + d*d*d;
   //double* W = (double*)malloc(dim*dim*sizeof(double));
 
@@ -63,6 +64,8 @@ void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
       W[j*dim+k] = 0.0;
   }
   double* g = (double*)malloc(dim*sizeof(double));
+  omp_set_num_threads(nc >= 1 ? nc : omp_get_num_procs());
+  #pragma omp parallel for
   for (int i=0; i<n; i++)
   {
     // g == gi:
@@ -92,11 +95,21 @@ void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
       g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
     }
     // Add 1/n t(gi) %*% gi to W
-    for (int j=0; j<dim; j++)
+    for (int j=dim-1; j>=0; j--)
     {
-      for (int k=0; k<dim; k++)
-        W[j*dim+k] += g[j] * g[k] / n;
+      // This final nested loop is very costly. Some basic optimisations:
+      double gj = g[j];
+      int baseIdx = j * dim;
+      #pragma GCC unroll 100
+      for (int k=dim-1; k>=0; k--)
+        W[baseIdx+k] += gj * g[k];
     }
   }
+  // Normalize W: x 1/n
+  for (int j=0; j<dim; j++)
+  {
+    for (int k=0; k<dim; k++)
+      W[mi(j,k,dim,dim)] /= n;
+  }
   free(g);
 }
diff --git a/pkg/src/morpheus_init.c b/pkg/src/morpheus_init.c
index b872488..e0fddb8 100644
--- a/pkg/src/morpheus_init.c
+++ b/pkg/src/morpheus_init.c
@@ -9,13 +9,13 @@
 extern void hungarianAlgorithm(void*, void*, void*);
 extern void Moments_M2(void*, void*, void*, void*, void*);
 extern void Moments_M3(void*, void*, void*, void*, void*);
-extern void Compute_Omega(void*, void*, void*, void*, void*, void*);
+extern void Compute_Omega(void*, void*, void*, void*, void*, void*, void*);
 
 static const R_CMethodDef CEntries[] = {
   {"hungarianAlgorithm", (DL_FUNC) &hungarianAlgorithm, 3},
   {"Moments_M2",         (DL_FUNC) &Moments_M2,         5},
   {"Moments_M3",         (DL_FUNC) &Moments_M3,         5},
-  {"Compute_Omega",      (DL_FUNC) &Compute_Omega,      6},
+  {"Compute_Omega",      (DL_FUNC) &Compute_Omega,      7},
   {NULL, NULL, 0}
 };
 
diff --git a/reports/accuracy.R b/reports/accuracy.R
index 24191d8..9a9d21d 100644
--- a/reports/accuracy.R
+++ b/reports/accuracy.R
@@ -10,7 +10,7 @@ optimBeta <- function(N, n, p, beta, b, link, ncores)
         K <- ncol(fargs$beta)
         M <- computeMoments(fargs$X, fargs$Y)
         mu <- computeMu(fargs$X, fargs$Y, list(K=K, M=M))
-        op <- optimParams(fargs$X, fargs$Y, K, fargs$link, M)
+        op <- optimParams(fargs$X, fargs$Y, K, fargs$link, M, 1) #only 1 OpenMP core
         x_init <- list(p=rep(1/K,K-1), beta=mu, b=rep(0,K))
         res2 <- NULL
         tryCatch({
diff --git a/reports/multistart.R b/reports/multistart.R
index 73df538..432f312 100644
--- a/reports/multistart.R
+++ b/reports/multistart.R
@@ -10,7 +10,7 @@ testMultistart <- function(N, n, p, beta, b, link, nstart, ncores)
         library(morpheus)
         K <- ncol(fargs$beta)
         mu <- computeMu(fargs$X, fargs$Y, list(K=K, M=fargs$M))
-        op <- optimParams(fargs$X, fargs$Y, K, fargs$link, fargs$M)
+        op <- optimParams(fargs$X, fargs$Y, K, fargs$link, fargs$M, 1)
         x_init <- list(p=rep(1/K,K-1), beta=mu, b=rep(0,K))
 				res2 <- NULL
 				tryCatch({
diff --git a/reports/run_time_cl.sh b/reports/run_time_cl.sh
index 3055072..943d7c1 100644
--- a/reports/run_time_cl.sh
+++ b/reports/run_time_cl.sh
@@ -1,15 +1,19 @@
 #!/bin/bash
 
-#PBS -l nodes=1:ppn=16,mem=8gb,pmem=512mb
-#PBS -j oe
-
-#PBS -o .output
+#$ -N morpheus
+#$ -wd /workdir2/auder/morpheus/reports
+#$ -m abes
+#$ -M benjamin@auder.net
+#$ -pe make 50
 rm -f .output
-
-WORKDIR=/workdir2/auder/morpheus/reports
-cd $WORKDIR
+#$ -o .output
+#$ -j y
 
 module load R
 
+N=1000
+nc=50
+link="logit"
+
 # arg --vanilla maybe possible on cluster
-R --slave --args N=1000 nc=16 link=logit <timings.R >out 2>&1
+R --slave --args N=$N nc=$nc link=$link <timings.R >out 2>&1
diff --git a/reports/timings.R b/reports/timings.R
index 704fd82..6d26bc0 100644
--- a/reports/timings.R
+++ b/reports/timings.R
@@ -16,8 +16,8 @@ ourOptim <- function(X, Y, K, link)
 {
 	M <- computeMoments(X, Y)
 	mu <- computeMu(X, Y, list(K=K,M=M))
-	x_init = c(1/2, as.double(mu), c(0,0))
-	optimParams(K, link, list(M=M))$run(x_init)
+	x_init = list(p=rep(1/K,K-1), beta=mu, b=rep(0,K))
+	optimParams(X, Y, K, link, M, 1)$run(x_init)
 	NULL
 }
 
-- 
2.44.0