From 07f2d0453144f5b639dcc28f1df40a141db7da79 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin@auder>
Date: Fri, 20 Dec 2019 15:18:36 +0100
Subject: [PATCH] Improve printResults script

---
 pkg/R/optimParams.R     |  2 +-
 reports/accuracy.R      |  3 ++-
 reports/printResults.R  | 32 +++++++++++++++++++++-----------
 reports/run_accu_cl.sh  | 13 +++++++------
 reports/run_multi_cl.sh |  7 +++----
 5 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R
index 5c80fc2..5d37898 100644
--- a/pkg/R/optimParams.R
+++ b/pkg/R/optimParams.R
@@ -268,7 +268,7 @@ setRefClass(
       # (Re)Set W to identity, to allow several run from the same object
       W <<- diag(d+d^2+d^3)
 
-      loopMax <- 2 #TODO: loopMax = 3 ?
+      loopMax <- 2 #TODO: loopMax = 3 ? Seems not improving...
       for (loop in 1:loopMax)
       {
         op_res = constrOptim( linArgs(θ0), .self$f, .self$grad_f,
diff --git a/reports/accuracy.R b/reports/accuracy.R
index 6f322bf..96f3a4a 100644
--- a/reports/accuracy.R
+++ b/reports/accuracy.R
@@ -17,7 +17,8 @@ optimBeta <- function(N, n, p, beta, b, link, ncores)
           res2 <- do.call(rbind, op$run(x_init))
         }, error = function(e) {})
         res2
-      },
+      }
+			,
       # flexmix
       function(fargs) {
         library(flexmix)
diff --git a/reports/printResults.R b/reports/printResults.R
index a6cac05..b61084f 100644
--- a/reports/printResults.R
+++ b/reports/printResults.R
@@ -1,32 +1,42 @@
+# NOTE: discard top 2% of highest values
 prms <- function(name, idx)
 {
   load(name)
   d <- nrow(mr[[1]][[1]])-2
+	if (idx > length(mr))
+		mr[[idx]] = mr[[1]]
   p <- colMeans(do.call(rbind, lapply(mr[[idx]], function(m) m[1,])))
-  b <- colMeans(do.call(rbind, lapply(mr[[idx]], function(m) m[2+d,])))
-  L <- length(mr[[1]])
-  beta <- (1/L) * Reduce("+", lapply(mr[[idx]], function(m) m[2:(d+1),]))
+	bVects <- lapply(mr[[idx]], function(m) m[2+d,])
+	q98 <- quantile(sapply(bVects, function(bv) sum(abs(bv))), 0.98)
+	bFiltered <- Filter(function(bv) sum(abs(bv)) < q98, bVects)
+  b <- colMeans(do.call(rbind, bFiltered))
+	betaMatrices <- lapply(mr[[idx]], function(m) m[2:(d+1),])
+	q98 <- quantile(sapply(betaMatrices, function(bm) sum(abs(bm))), 0.98)
+	bmFiltered <- Filter(function(bm) sum(abs(bm)) < q98, betaMatrices)
+	beta <- (1/length(bmFiltered)) * Reduce("+", bmFiltered)
   list(p, beta, b, mr_params)
 }
 
-pprms <- function(link)
+pprms <- function(link, prefix="./")
 {
-  for (n in c("5000", "10000", "100000", "500000", "1000000"))
+  toprint <- matrix(nrow=0, ncol=13) #13=1+2+1 + 1+2+1 + 1+3+1
+	for (n in c("5000", "10000", "100000", "500000", "1000000"))
   {
     for (method in 1:2)
     {
-      toprint <- c()
+			row <- c()
       for (d in c(2,5,10))
       {
-        name <- paste0("res_", n, "_", d, "_", link, ".RData")
+        name <- paste0(prefix, "res_", n, "_", d, "_", link, ".RData")
         params <- prms(name, method)
-        toprint <- c(toprint, c(
+        row <- c( row,
           sum(abs(params[[1]] - params[[4]]$p)),
           colSums(abs(params[[2]] - params[[4]]$beta)),
-          sum(abs(params[[3]] - params[[4]]$b))
-        ))
+          sum(abs(params[[3]] - params[[4]]$b)) )
       }
-      print(toprint, digits=2)
+      toprint <- rbind(toprint, row)
     }
   }
+	print(formatC(toprint, format="e", digits=1)) #for reporting
+	return (toprint)
 }
diff --git a/reports/run_accu_cl.sh b/reports/run_accu_cl.sh
index f73b491..39ef317 100644
--- a/reports/run_accu_cl.sh
+++ b/reports/run_accu_cl.sh
@@ -2,8 +2,8 @@
 
 #$ -N morpheus
 #$ -wd /workdir2/auder/morpheus/reports
-##$ -m abes
-##$ -M benjamin@auder.net
+#$ -m abes
+#$ -M benjamin@auder.net
 #$ -pe make 50
 rm -f .output
 #$ -o .output
@@ -12,11 +12,12 @@ rm -f .output
 module load R/3.6.1
 
 N=100
-n=1e5
 nc=50
 
-for d in 2 5 10; do
-	for link in "logit" "probit"; do
-		R --slave --args N=$N n=$n nc=$nc d=$d link=$link <accuracy.R >out_${n}_${link}_${d} 2>&1
+for n in "5000" "10000" "100000" "500000" "1000000"; do
+	for d in 2 5 10; do
+		for link in "logit" "probit"; do
+			R --slave --args N=$N n=$n nc=$nc d=$d link=$link <accuracy.R >out_${n}_${link}_${d} 2>&1
+		done
 	done
 done
diff --git a/reports/run_multi_cl.sh b/reports/run_multi_cl.sh
index cd5e493..eac62a9 100644
--- a/reports/run_multi_cl.sh
+++ b/reports/run_multi_cl.sh
@@ -1,21 +1,20 @@
 #!/bin/bash
 
-# Lancement: qsub -o .output -j y run_multi_cl.sh
-
 #$ -N morpheus
 #$ -wd /workdir2/auder/morpheus/reports
 #$ -m abes
 #$ -M benjamin@auder.net
 #$ -pe make 50
-#$ -l h_vmem=1G
 rm -f .output
+#$ -o .output
+#$ -j y
 
 module load R/3.6.1
 
 N=100
 n=1e5
 nc=50
-nstart=5
+nstart=3
 
 for d in 2 5 10; do
 	for link in "logit" "probit"; do
-- 
2.44.0