From 074c721aa587d45c4f7ef10f8a28fdc09273d31a Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Mon, 16 Dec 2019 15:47:39 +0100
Subject: [PATCH] Remove weights from reports scripts + first naive attempt to
 optimize computeW

---
 pkg/R/optimParams.R    |  4 +-
 pkg/src/functions.c    | 83 ++++++++++++++++++++++++++++++++++++------
 reports/accuracy.R     | 15 +++-----
 reports/run_accu_cl.sh | 14 +------
 4 files changed, 80 insertions(+), 36 deletions(-)

diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R
index c42e6c5..13aec83 100644
--- a/pkg/R/optimParams.R
+++ b/pkg/R/optimParams.R
@@ -122,7 +122,7 @@ setRefClass(
       dd <- d + d^2 + d^3
       M <- Moments(θ)
       Omega <- matrix( .C("Compute_Omega",
-        X=as.double(X), Y=as.double(Y), M=as.double(M),
+        X=as.double(X), Y=as.integer(Y), M=as.double(M),
         pn=as.integer(n), pd=as.integer(d),
         W=as.double(W), PACKAGE="morpheus")$W, nrow=dd, ncol=dd )
       MASS::ginv(Omega)
@@ -257,7 +257,7 @@ setRefClass(
       else if (!is.numeric(θ0$b) || length(θ0$b) != K || any(is.na(θ0$b)))
         stop("θ0$b: length K, no NA")
       # TODO: stopping condition? N iterations? Delta <= epsilon ?
-      for (loop in 1:10)
+      for (loop in 1:2)
       {
         op_res = constrOptim( linArgs(θ0), .self$f, .self$grad_f,
           ui=cbind(
diff --git a/pkg/src/functions.c b/pkg/src/functions.c
index 1f35585..feea3ad 100644
--- a/pkg/src/functions.c
+++ b/pkg/src/functions.c
@@ -58,7 +58,59 @@ void Moments_M3(double* X, double* Y, int* pn, int* pd, double* M3)
 
 // W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) )
 // with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments
-void Compute_Omega(double* X, double* Y, double* M, int* pn, int* pd, double* W)
+//void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
+//{
+//  int n=*pn, d=*pd;
+//  int dim = d + d*d + d*d*d;
+//  //double* W = (double*)malloc(dim*dim*sizeof(double));
+//
+//  // (Re)Initialize W:
+//  for (int j=0; j<dim; j++)
+//  {
+//    for (int k=0; k<dim; k++)
+//      W[j*dim+k] = 0.0;
+//  }
+//  double* g = (double*)malloc(dim*sizeof(double));
+//  for (int i=0; i<n; i++)
+//  {
+//    // g == gi:
+//    for (int j=0; j<d; j++)
+//      g[j] = Y[i] * X[mi(i,j,n,d)] - M[j];
+//    for (int j=d; j<d+(d*d); j++)
+//    {
+//      int idx1 = (j-d) % d; //num row
+//      int idx2 = ((j-d) - idx1) / d; //num col
+//      g[j] = 0.0;
+//      if (idx1 == idx2)
+//        g[j] -= Y[i];
+//      g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)] - M[j];
+//    }
+//    for (int j=d+d*d; j<dim; j++)
+//    {
+//      int idx1 = (j-d-d*d) % d; //num row
+//      int idx2 = ((j-d-d*d - idx1) / d) %d; //num col
+//      int idx3 = (((j-d-d*d - idx1) / d) - idx2) / d; //num "depth"
+//      g[j] = 0.0;
+//      if (idx1 == idx2)
+//        g[j] -= Y[i] * X[mi(i,idx3,n,d)];
+//      if (idx1 == idx3)
+//        g[j] -= Y[i] * X[mi(i,idx2,n,d)];
+//      if (idx2 == idx3)
+//        g[j] -= Y[i] * X[mi(i,idx1,n,d)];
+//      g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
+//    }
+//    // Add 1/n t(gi) %*% gi to W
+//    for (int j=0; j<dim; j++)
+//    {
+//      for (int k=0; k<dim; k++)
+//        W[j*dim+k] += g[j] * g[k] / n;
+//    }
+//  }
+//  free(g);
+//}
+
+// Optimisation attempt:
+void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
 {
   int n=*pn, d=*pd;
   int dim = d + d*d + d*d*d;
@@ -73,17 +125,21 @@ void Compute_Omega(double* X, double* Y, double* M, int* pn, int* pd, double* W)
   double* g = (double*)malloc(dim*sizeof(double));
   for (int i=0; i<n; i++)
   {
+    printf("i: %i\n",i);
     // g == gi:
     for (int j=0; j<d; j++)
-      g[j] = Y[i] * X[mi(i,j,n,d)] - M[j];
+      g[j] = (Y[i] ? X[mi(i,j,n,d)] - M[j] : 0.0);
     for (int j=d; j<d+(d*d); j++)
     {
       int idx1 = (j-d) % d; //num row
       int idx2 = ((j-d) - idx1) / d; //num col
       g[j] = 0.0;
-      if (idx1 == idx2)
-        g[j] -= Y[i];
-      g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)] - M[j];
+      if (Y[i])
+      {
+        if (idx1 == idx2)
+          g[j]--;
+        g[j] += X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)] - M[j];
+      }
     }
     for (int j=d+d*d; j<dim; j++)
     {
@@ -91,13 +147,16 @@ void Compute_Omega(double* X, double* Y, double* M, int* pn, int* pd, double* W)
       int idx2 = ((j-d-d*d - idx1) / d) %d; //num col
       int idx3 = (((j-d-d*d - idx1) / d) - idx2) / d; //num "depth"
       g[j] = 0.0;
-      if (idx1 == idx2)
-        g[j] -= Y[i] * X[mi(i,idx3,n,d)];
-      if (idx1 == idx3)
-        g[j] -= Y[i] * X[mi(i,idx2,n,d)];
-      if (idx2 == idx3)
-        g[j] -= Y[i] * X[mi(i,idx1,n,d)];
-      g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
+      if (Y[i])
+      {
+        if (idx1 == idx2)
+          g[j] -= X[mi(i,idx3,n,d)];
+        if (idx1 == idx3)
+          g[j] -= X[mi(i,idx2,n,d)];
+        if (idx2 == idx3)
+          g[j] -= X[mi(i,idx1,n,d)];
+        g[j] += X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
+      }
     }
     // Add 1/n t(gi) %*% gi to W
     for (int j=0; j<dim; j++)
diff --git a/reports/accuracy.R b/reports/accuracy.R
index 535f88f..5eadd3b 100644
--- a/reports/accuracy.R
+++ b/reports/accuracy.R
@@ -1,8 +1,8 @@
-optimBeta <- function(N, n, K, p, beta, b, link, weights, ncores)
+optimBeta <- function(N, n, K, p, beta, b, link, ncores)
 {
 	library(morpheus)
 	res <- multiRun(
-		list(n=n, p=p, beta=beta, b=b, optargs=list(K=K, link=link, weights=weights)),
+		list(n=n, p=p, beta=beta, b=b, optargs=list(K=K, link=link)),
 		list(
 			# morpheus
 			function(fargs) {
@@ -68,8 +68,6 @@ N <- 10
 d <- 2
 n <- 1e4
 ncores <- 1
-strw <- "1-1-1"
-weights <- c(1,1,1)
 
 cmd_args <- commandArgs()
 for (arg in cmd_args)
@@ -86,9 +84,6 @@ for (arg in cmd_args)
 			d <- as.integer(spl[2])
 		} else if (spl[1] == "link") {
 			link <- spl[2]
-		} else if (spl[1] == "weights") {
-			strw <- spl[2]
-		  weights <- as.numeric(unlist(strsplit(spl[2], ",")))
 		}
 	}
 }
@@ -115,8 +110,8 @@ if (d == 2) {
 	beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2,2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2,1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0,1,1,2,2,-2,-2,3,1,0,0), ncol=K )
 }
 
-mr <- optimBeta(N, n, K, p, beta, b, link, weights, ncores)
+mr <- optimBeta(N, n, K, p, beta, b, link, ncores)
 mr_params <- list("N"=N, "nc"=ncores, "n"=n, "K"=K, "d"=d, "link"=link,
-	"p"=c(p,1-sum(p)), "beta"=beta, "b"=b, "weights"=weights)
+	"p"=c(p,1-sum(p)), "beta"=beta, "b"=b)
 
-save("mr", "mr_params", file=paste("res_",n,"_",d,"_",link,"_",strw,".RData",sep=""))
+save("mr", "mr_params", file=paste("res_",n,"_",d,"_",link,".RData",sep=""))
diff --git a/reports/run_accu_cl.sh b/reports/run_accu_cl.sh
index 04f1519..c8dcb5d 100644
--- a/reports/run_accu_cl.sh
+++ b/reports/run_accu_cl.sh
@@ -14,20 +14,10 @@ module load R/3.6.1
 
 N=100
 n=1e5
-nc=10
+nc=50
 
 for d in 2 5 10; do
 	for link in "logit" "probit"; do
-		for weights in "1,1,0"; do
-			R --slave --args N=$N n=$n nc=$nc d=$d link=$link weights=$weights <accuracy.R >out_${n}_${link}_${d}_${weights} 2>&1
-		done
+		R --slave --args N=$N n=$n nc=$nc d=$d link=$link <accuracy.R >out_${n}_${link}_${d}_${weights} 2>&1
 	done
 done
-
-#for d in 2 5; do
-#	for n in 5000 10000 100000 500000 1000000; do
-#		for link in "logit" "probit"; do
-#			R --slave --args N=$N n=$n nc=$nc d=$d link=$link <accuracy.R >out_$n$link$d 2>&1
-#		done
-#	done
-#done
-- 
2.44.0