From 98b8a5ddffdce7e0b63746d4b58bb923049dca7d Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 23 Sep 2019 01:15:19 +0200 Subject: [PATCH] Add weights handling (experimental) --- pkg/R/optimParams.R | 32 +++++++++++++++++++++----------- reports/accuracy.R | 11 +++++++---- reports/test.sh | 16 ++++++++++++++++ 3 files changed, 44 insertions(+), 15 deletions(-) create mode 100644 reports/test.sh diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R index 2eada8f..4f886ac 100644 --- a/pkg/R/optimParams.R +++ b/pkg/R/optimParams.R @@ -10,6 +10,7 @@ #' \item 'M' : list of moments of order 1,2,3: will be computed if not provided. #' \item 'X,Y' : input/output, mandatory if moments not given #' \item 'exact': use exact formulas when available? +#' \item weights Weights on moments when minimizing sum of squares #' } #' #' @return An object 'op' of class OptimParams, initialized so that \code{op$run(x0)} @@ -56,9 +57,14 @@ optimParams = function(K, link=c("logit","probit"), optargs=list()) M <- computeMoments(optargs$X,optargs$Y) } + weights <- optargs$weights + if (is.null(weights)) + weights <- rep(1, K) + # Build and return optimization algorithm object methods::new("OptimParams", "li"=link, "M1"=as.double(M[[1]]), - "M2"=as.double(M[[2]]), "M3"=as.double(M[[3]]), "K"=as.integer(K)) + "M2"=as.double(M[[2]]), "M3"=as.double(M[[3]]), + "weights"=weights, "K"=as.integer(K)) } # Encapsulated optimization for p (proportions), β and b (regression parameters) @@ -67,6 +73,7 @@ optimParams = function(K, link=c("logit","probit"), optargs=list()) # @field M1 Estimated first-order moment # @field M2 Estimated second-order moment (flattened) # @field M3 Estimated third-order moment (flattened) +# @field weights Vector of moments' weights # @field K Number of populations # @field d Number of dimensions # @@ -132,9 +139,9 @@ setRefClass( β3 <- apply(β, 2, function(col) col %o% col %o% col) return( - sum( ( β %*% (p * .G(li,1,λ,b)) - M1 )^2 ) + - sum( ( β2 %*% (p * .G(li,2,λ,b)) - M2 )^2 ) + - sum( ( β3 %*% (p * .G(li,3,λ,b)) - M3 )^2 ) ) + weights[1] * sum( ( β %*% (p * .G(li,1,λ,b)) - M1 )^2 ) + + weights[2] * sum( ( β2 %*% (p * .G(li,2,λ,b)) - M2 )^2 ) + + weights[3] * sum( ( β3 %*% (p * .G(li,3,λ,b)) - M3 )^2 ) ) }, grad_f = function(x) @@ -166,9 +173,9 @@ setRefClass( km1 = 1:(K-1) grad <- #gradient on p - t( sweep(as.matrix(β [,km1]), 2, G1[km1], '*') - G1[K] * β [,K] ) %*% F1 + - t( sweep(as.matrix(β2[,km1]), 2, G2[km1], '*') - G2[K] * β2[,K] ) %*% F2 + - t( sweep(as.matrix(β3[,km1]), 2, G3[km1], '*') - G3[K] * β3[,K] ) %*% F3 + weights[1] * t( sweep(as.matrix(β [,km1]), 2, G1[km1], '*') - G1[K] * β [,K] ) %*% F1 + + weights[2] * t( sweep(as.matrix(β2[,km1]), 2, G2[km1], '*') - G2[K] * β2[,K] ) %*% F2 + + weights[3] * t( sweep(as.matrix(β3[,km1]), 2, G3[km1], '*') - G3[K] * β3[,K] ) %*% F3 grad_β <- matrix(nrow=d, ncol=K) for (i in 1:d) @@ -197,14 +204,17 @@ setRefClass( dβ3_right[block,] <- dβ3_right[block,] + β2 dβ3 <- dβ3_left + sweep(dβ3_right, 2, p * G3, '*') - grad_β[i,] <- t(dβ) %*% F1 + t(dβ2) %*% F2 + t(dβ3) %*% F3 + grad_β[i,] <- + weights[1] * t(dβ) %*% F1 + + weights[2] * t(dβ2) %*% F2 + + weights[3] * t(dβ3) %*% F3 } grad <- c(grad, as.double(grad_β)) grad = c(grad, #gradient on b - t( sweep(β, 2, p * G2, '*') ) %*% F1 + - t( sweep(β2, 2, p * G3, '*') ) %*% F2 + - t( sweep(β3, 2, p * G4, '*') ) %*% F3 ) + weights[1] * t( sweep(β, 2, p * G2, '*') ) %*% F1 + + weights[2] * t( sweep(β2, 2, p * G3, '*') ) %*% F2 + + weights[3] * t( sweep(β3, 2, p * G4, '*') ) %*% F3 ) grad }, diff --git a/reports/accuracy.R b/reports/accuracy.R index 2381524..c33fa0f 100644 --- a/reports/accuracy.R +++ b/reports/accuracy.R @@ -1,8 +1,8 @@ -optimBeta <- function(N, n, K, p, beta, b, link, ncores) +optimBeta <- function(N, n, K, p, beta, b, link, weights, ncores) { library(morpheus) res <- multiRun( - list(n=n, p=p, beta=beta, b=b, optargs=list(K=K, link=link)), + list(n=n, p=p, beta=beta, b=b, optargs=list(K=K, link=link, weights=weights)), list( # morpheus function(fargs) { @@ -71,6 +71,7 @@ N <- 10 d <- 2 n <- 1e4 ncores <- 1 +weights <- c(1,1,1) cmd_args <- commandArgs() for (arg in cmd_args) @@ -87,6 +88,8 @@ for (arg in cmd_args) d <- as.integer(spl[2]) } else if (spl[1] == "link") { link <- spl[2] + } else if (spl[1] == "weights") { + weights <- unlist(strsplit(spl[2], ",")) } } } @@ -113,8 +116,8 @@ if (d == 2) { beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2,2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2,1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0,1,1,2,2,-2,-2,3,1,0,0), ncol=K ) } -mr <- optimBeta(N, n, K, p, beta, b, link, ncores) +mr <- optimBeta(N, n, K, p, beta, b, link, weights, ncores) mr_params <- list("N"=N, "n"=n, "K"=K, "d"=d, "link"=link, - "p"=c(p,1-sum(p)), "beta"=beta, "b"=b) + "p"=c(p,1-sum(p)), "beta"=beta, "b"=b, "weights"=weights) save("mr", "mr_params", file=paste("multirun_",n,"_",d,"_",link,".RData",sep="")) diff --git a/reports/test.sh b/reports/test.sh new file mode 100644 index 0000000..b617a09 --- /dev/null +++ b/reports/test.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# arg --vanilla maybe possible on cluster +for d in 2 5; do + for link in "logit" "probit"; do + R --slave --args N=10 n=1e3 nc=3 d=$d link=$link out$d$link 2>&1 + done +done + +#for d in 2 5; do +# for n in 5000 10000 100000 500000 1000000; do +# for link in "logit" "probit"; do +# R --slave --args N=1000 n=$n nc=64 d=$d link=$link out_$n$link$d 2>&1 +# done +# done +#done -- 2.44.0