From 98b8a5ddffdce7e0b63746d4b58bb923049dca7d Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Mon, 23 Sep 2019 01:15:19 +0200
Subject: [PATCH] Add weights handling (experimental)

---
 pkg/R/optimParams.R | 32 +++++++++++++++++++++-----------
 reports/accuracy.R  | 11 +++++++----
 reports/test.sh     | 16 ++++++++++++++++
 3 files changed, 44 insertions(+), 15 deletions(-)
 create mode 100644 reports/test.sh

diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R
index 2eada8f..4f886ac 100644
--- a/pkg/R/optimParams.R
+++ b/pkg/R/optimParams.R
@@ -10,6 +10,7 @@
 #'     \item 'M' : list of moments of order 1,2,3: will be computed if not provided.
 #'     \item 'X,Y' : input/output, mandatory if moments not given
 #'     \item 'exact': use exact formulas when available?
+#'     \item weights Weights on moments when minimizing sum of squares
 #'   }
 #'
 #' @return An object 'op' of class OptimParams, initialized so that \code{op$run(x0)}
@@ -56,9 +57,14 @@ optimParams = function(K, link=c("logit","probit"), optargs=list())
 		M <- computeMoments(optargs$X,optargs$Y)
 	}
 
+  weights <- optargs$weights
+  if (is.null(weights))
+    weights <- rep(1, K)
+
 	# Build and return optimization algorithm object
 	methods::new("OptimParams", "li"=link, "M1"=as.double(M[[1]]),
-		"M2"=as.double(M[[2]]), "M3"=as.double(M[[3]]), "K"=as.integer(K))
+		"M2"=as.double(M[[2]]), "M3"=as.double(M[[3]]),
+    "weights"=weights, "K"=as.integer(K))
 }
 
 # Encapsulated optimization for p (proportions), β and b (regression parameters)
@@ -67,6 +73,7 @@ optimParams = function(K, link=c("logit","probit"), optargs=list())
 # @field M1 Estimated first-order moment
 # @field M2 Estimated second-order moment (flattened)
 # @field M3 Estimated third-order moment (flattened)
+# @field weights Vector of moments' weights
 # @field K Number of populations
 # @field d Number of dimensions
 #
@@ -132,9 +139,9 @@ setRefClass(
 			β3 <- apply(β, 2, function(col) col %o% col %o% col)
 
 			return(
-				sum( ( β  %*% (p * .G(li,1,λ,b)) - M1 )^2 ) +
-				sum( ( β2 %*% (p * .G(li,2,λ,b)) - M2 )^2 ) +
-				sum( ( β3 %*% (p * .G(li,3,λ,b)) - M3 )^2 ) )
+				weights[1] * sum( ( β  %*% (p * .G(li,1,λ,b)) - M1 )^2 ) +
+				weights[2] * sum( ( β2 %*% (p * .G(li,2,λ,b)) - M2 )^2 ) +
+				weights[3] * sum( ( β3 %*% (p * .G(li,3,λ,b)) - M3 )^2 ) )
 		},
 
 		grad_f = function(x)
@@ -166,9 +173,9 @@ setRefClass(
 
 			km1 = 1:(K-1)
 			grad <- #gradient on p
-				t( sweep(as.matrix(β [,km1]), 2, G1[km1], '*') - G1[K] * β [,K] ) %*% F1 +
-				t( sweep(as.matrix(β2[,km1]), 2, G2[km1], '*') - G2[K] * β2[,K] ) %*% F2 +
-				t( sweep(as.matrix(β3[,km1]), 2, G3[km1], '*') - G3[K] * β3[,K] ) %*% F3
+				weights[1] * t( sweep(as.matrix(β [,km1]), 2, G1[km1], '*') - G1[K] * β [,K] ) %*% F1 +
+				weights[2] * t( sweep(as.matrix(β2[,km1]), 2, G2[km1], '*') - G2[K] * β2[,K] ) %*% F2 +
+				weights[3] * t( sweep(as.matrix(β3[,km1]), 2, G3[km1], '*') - G3[K] * β3[,K] ) %*% F3
 
 			grad_β <- matrix(nrow=d, ncol=K)
 			for (i in 1:d)
@@ -197,14 +204,17 @@ setRefClass(
 				dβ3_right[block,] <- dβ3_right[block,] + β2
 				dβ3 <- dβ3_left + sweep(dβ3_right, 2, p * G3, '*')
 
-				grad_β[i,] <- t(dβ) %*% F1 + t(dβ2) %*% F2 + t(dβ3) %*% F3
+				grad_β[i,] <-
+          weights[1] * t(dβ) %*% F1 +
+          weights[2] * t(dβ2) %*% F2 +
+          weights[3] * t(dβ3) %*% F3
 			}
 			grad <- c(grad, as.double(grad_β))
 
 			grad = c(grad, #gradient on b
-				t( sweep(β,  2, p * G2, '*') ) %*% F1 +
-				t( sweep(β2, 2, p * G3, '*') ) %*% F2 +
-				t( sweep(β3, 2, p * G4, '*') ) %*% F3 )
+				weights[1] * t( sweep(β,  2, p * G2, '*') ) %*% F1 +
+				weights[2] * t( sweep(β2, 2, p * G3, '*') ) %*% F2 +
+				weights[3] * t( sweep(β3, 2, p * G4, '*') ) %*% F3 )
 
 			grad
 		},
diff --git a/reports/accuracy.R b/reports/accuracy.R
index 2381524..c33fa0f 100644
--- a/reports/accuracy.R
+++ b/reports/accuracy.R
@@ -1,8 +1,8 @@
-optimBeta <- function(N, n, K, p, beta, b, link, ncores)
+optimBeta <- function(N, n, K, p, beta, b, link, weights, ncores)
 {
 	library(morpheus)
 	res <- multiRun(
-		list(n=n, p=p, beta=beta, b=b, optargs=list(K=K, link=link)),
+		list(n=n, p=p, beta=beta, b=b, optargs=list(K=K, link=link, weights=weights)),
 		list(
 			# morpheus
 			function(fargs) {
@@ -71,6 +71,7 @@ N <- 10
 d <- 2
 n <- 1e4
 ncores <- 1
+weights <- c(1,1,1)
 
 cmd_args <- commandArgs()
 for (arg in cmd_args)
@@ -87,6 +88,8 @@ for (arg in cmd_args)
 			d <- as.integer(spl[2])
 		} else if (spl[1] == "link") {
 			link <- spl[2]
+		} else if (spl[1] == "weights") {
+		  weights <- unlist(strsplit(spl[2], ","))
 		}
 	}
 }
@@ -113,8 +116,8 @@ if (d == 2) {
 	beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2,2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2,1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0,1,1,2,2,-2,-2,3,1,0,0), ncol=K )
 }
 
-mr <- optimBeta(N, n, K, p, beta, b, link, ncores)
+mr <- optimBeta(N, n, K, p, beta, b, link, weights, ncores)
 mr_params <- list("N"=N, "n"=n, "K"=K, "d"=d, "link"=link,
-	"p"=c(p,1-sum(p)), "beta"=beta, "b"=b)
+	"p"=c(p,1-sum(p)), "beta"=beta, "b"=b, "weights"=weights)
 
 save("mr", "mr_params", file=paste("multirun_",n,"_",d,"_",link,".RData",sep=""))
diff --git a/reports/test.sh b/reports/test.sh
new file mode 100644
index 0000000..b617a09
--- /dev/null
+++ b/reports/test.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# arg --vanilla maybe possible on cluster
+for d in 2 5; do
+	for link in "logit" "probit"; do
+		R --slave --args N=10 n=1e3 nc=3 d=$d link=$link <accuracy.R >out$d$link 2>&1
+	done
+done
+
+#for d in 2 5; do
+#	for n in 5000 10000 100000 500000 1000000; do
+#		for link in "logit" "probit"; do
+#			R --slave --args N=1000 n=$n nc=64 d=$d link=$link <accuracy.R >out_$n$link$d 2>&1
+#		done
+#	done
+#done
-- 
2.44.0