From 1d014a8640a33f16f0d21244a1419134e8553910 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Fri, 20 Sep 2019 08:59:44 +0200
Subject: [PATCH] Add updated files for reporting

---
 .gitignore             |   4 +-
 reports/accuracy.R     | 100 +++++++++++++++++++++++++++++++++++++++++
 reports/multistart.R   |  85 +++++++++++++++++++++++++++++++++++
 reports/run_accu_cl.sh |  19 ++++++++
 reports/run_time_cl.sh |  15 +++++++
 reports/timings.R      |  84 ++++++++++++++++++++++++++++++++++
 6 files changed, 306 insertions(+), 1 deletion(-)
 create mode 100644 reports/accuracy.R
 create mode 100644 reports/multistart.R
 create mode 100644 reports/run_accu_cl.sh
 create mode 100644 reports/run_time_cl.sh
 create mode 100644 reports/timings.R

diff --git a/.gitignore b/.gitignore
index 2c6aecc..260e0f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,7 +11,9 @@ NAMESPACE
 /pkg-cran/
 
 # Ignore various reports
-/reports/
+/reports/*
+!/reports/*.R
+!/reports/*.sh
 /vignettes/report.html
 /vignettes/report_cache/
 /vignettes/report_files/
diff --git a/reports/accuracy.R b/reports/accuracy.R
new file mode 100644
index 0000000..2f35792
--- /dev/null
+++ b/reports/accuracy.R
@@ -0,0 +1,100 @@
+optimBeta <- function(N, n, K, p, beta, b, link, ncores)
+{
+	library(morpheus)
+	res <- multiRun(
+		list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link)),
+		list(
+			# morpheus
+			function(fargs) {
+				library(morpheus)
+				K <- fargs$optargs$K
+				M <- computeMoments(fargs$X, fargs$Y)
+				fargs$optargs$M <- M
+				mu <- computeMu(fargs$X, fargs$Y, fargs$optargs)
+				op <- optimParams(K,link,fargs$optargs)
+				x_init <- list(p=rep(1/K,K-1), beta=mu, b=rep(0,K))
+				do.call(rbind, op$run(x_init))
+			},
+			# flexmix
+			function(fargs) {
+				library(flexmix)
+				source("../patch_Bettina/FLXMRglm.R")
+				K <- fargs$optargs$K
+				dat <- as.data.frame( cbind(fargs$Y,fargs$X) )
+				fm <- flexmix( cbind(V1, 1-V1) ~ .-V1, data=dat, k=K,
+					model = FLXMRglm(family = binomial(link = link)) )
+				p <- mean(fm@posterior[["scaled"]][,1])
+				out <- refit(fm)
+				beta_b <- sapply( seq_len(K), function(i) {
+					as.double( out@components[[1]][[i]][,1] )
+				} )
+				rbind(p, beta_b[2:nrow(beta_b),], beta_b[1,])
+			} ),
+		prepareArgs = function(fargs, index) {
+			library(morpheus)
+			io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link)
+			fargs$X = io$X
+			fargs$Y = io$Y
+			fargs$optargs$K = ncol(fargs$beta)
+			fargs$optargs$M = computeMoments(io$X,io$Y)
+			fargs
+		}, N=N, ncores=ncores, verbose=TRUE)
+	p <- c(p, 1-sum(p))
+	for (i in 1:2)
+		res[[i]] <- alignMatrices(res[[i]], ref=rbind(p,beta,b), ls_mode="exact")
+	res
+}
+
+#model = binomial; default values:
+link = "logit"
+N <- 10
+d <- 2
+n <- 1e4
+ncores <- 1
+
+cmd_args <- commandArgs()
+for (arg in cmd_args)
+{
+	if (substr(arg,1,1)!='-') {
+		spl <- strsplit(arg,'=')[[1]]
+		if (spl[1] == "nc") {
+			ncores <- as.integer(spl[2])
+		} else if (spl[1] == "N") {
+			N <- as.integer(spl[2])
+		} else if (spl[1] == "n") {
+			n <- as.integer(spl[2])
+		} else if (spl[1] == "d") {
+			d <- as.integer(spl[2])
+		} else if (spl[1] == "link") {
+			link <- spl[2]
+		}
+	}
+}
+
+if (d == 2) {
+	K <- 2
+	p <- .5
+	b <- c(-.2, .5)
+	beta <- matrix( c(1,-2, 3,1), ncol=K )
+} else if (d == 5) {
+	K <- 2
+	p <- .5
+	b <- c(-.2, .5)
+	beta <- matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K )
+} else if (d == 10) {
+	K <- 3
+	p <- c(.3, .3)
+	b <- c(-.2, 0, .5)
+	beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2), ncol=K )
+} else if (d == 20) {
+	K <- 3
+	p <- c(.3, .3)
+	b <- c(-.2, 0, .5)
+	beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2,2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2,1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0,1,1,2,2,-2,-2,3,1,0,0), ncol=K )
+}
+
+mr <- optimBeta(N, n, K, p, beta, b, link, ncores)
+mr_params <- list("N"=N, "n"=n, "K"=K, "d"=d, "link"=link,
+	"p"=c(p,1-sum(p)), "beta"=beta, "b"=b)
+
+save("mr", "mr_params", file=paste("multirun_",d,"_",link,".RData",sep=""))
diff --git a/reports/multistart.R b/reports/multistart.R
new file mode 100644
index 0000000..9971cd2
--- /dev/null
+++ b/reports/multistart.R
@@ -0,0 +1,85 @@
+library(morpheus)
+
+#model = binomial
+K <- 2
+p <- .5
+b <- c(-.2, .5)
+# Default values:
+link = "logit"
+N <- 100
+d <- 2
+n <- 1e4
+ncores <- 1
+nstart <- 3 #nstart-1 random starting points for each MC run
+
+cmd_args <- commandArgs()
+for (arg in cmd_args)
+{
+	if (substr(arg,1,1)!='-')
+	{
+		spl <- strsplit(arg,'=')[[1]]
+		if (spl[1] == "nc") {
+			ncores <- as.integer(spl[2])
+		} else if (spl[1] == "N") {
+			N <- as.integer(spl[2])
+		} else if (spl[1] == "n") {
+			n <- as.integer(spl[2])
+		} else if (spl[1] == "d") {
+			d <- as.integer(spl[2])
+		} else if (spl[1] == "link") {
+			link <- spl[2]
+		} else if (spl[1] == "nstart") {
+			nstart <- spl[2]
+		}
+	}
+}
+betas <- list(
+	matrix( c(1,-2, 3,1), ncol=K ), #d=2
+	matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K ), #d=5
+	matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0), ncol=K ) ) #d=10
+beta <- betas[[ ifelse( d==2, 1, ifelse(d==5,2,3) ) ]]
+
+ms <- multiRun(
+	list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link,nstart=nstart)), list(
+		function(fargs) {
+			# 1 start
+			library(morpheus)
+			K <- fargs$optargs$K
+			op <- optimParams(K, fargs$optargs$link, fargs$optargs)
+			x_init <- list(p=rep(1/K,K-1), beta=fargs$mu, b=rep(0,K))
+			do.call(rbind,op$run(x_init))
+		},
+		function(fargs) {
+			# B starts
+			library(morpheus)
+			K <- fargs$optargs$K
+			op <- optimParams(K, fargs$optargs$link, fargs$optargs)
+			best_val <- Inf
+			best_par <- list()
+			for (i in 1:fargs$optargs$nstart)
+			{
+				x_init <- list(p=rep(1/K,K-1), beta=i*fargs$mu, b=rep(0,K))
+				par <- op$run(x_init)
+				val <- op$f( op$linArgs(par) )
+				if (val < best_val)
+				{
+					best_par <- par
+					best_val <- val
+				}
+			}
+			do.call(rbind,best_par)
+		}),
+		prepareArgs = function(fargs) {
+			library(morpheus)
+			io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link)
+			fargs$optargs$M <- computeMoments(io$X, io$Y)
+			mu <- computeMu(io$X, io$Y, fargs$optargs)
+			fargs$mu <- mu
+		}, N=N, ncores=ncores, verbose=TRUE)
+for (i in 1:2)
+	ms[[i]] <- alignMatrices(ms[[i]], ref=rbind(p,beta,b), ls_mode="exact")
+
+ms_params <- list("N"=N, "nc"=ncores, "n"=n, "K"=K, "link"=link,
+	"p"=p, "beta"=beta, "b"=b, "nstart"=nstart)
+
+save(ms, ms_params, file="multistart.RData")
diff --git a/reports/run_accu_cl.sh b/reports/run_accu_cl.sh
new file mode 100644
index 0000000..d1d8e37
--- /dev/null
+++ b/reports/run_accu_cl.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+#PBS -l nodes=1:ppn=15,mem=8gb,pmem=512mb
+#PBS -j oe
+
+#PBS -o .output
+rm -f .output
+
+WORKDIR=/workdir2/auder/morpheus/reports
+cd $WORKDIR
+
+module load R
+
+# arg --vanilla maybe possible on cluster
+for d in 2 5 10 20; do
+	for link in "logit" "probit"; do
+		R --slave --args N=1000 n=1e5 nc=15 d=$d link=$link <accuracy.R >out$d$link 2>&1
+	done
+done
diff --git a/reports/run_time_cl.sh b/reports/run_time_cl.sh
new file mode 100644
index 0000000..3055072
--- /dev/null
+++ b/reports/run_time_cl.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+#PBS -l nodes=1:ppn=16,mem=8gb,pmem=512mb
+#PBS -j oe
+
+#PBS -o .output
+rm -f .output
+
+WORKDIR=/workdir2/auder/morpheus/reports
+cd $WORKDIR
+
+module load R
+
+# arg --vanilla maybe possible on cluster
+R --slave --args N=1000 nc=16 link=logit <timings.R >out 2>&1
diff --git a/reports/timings.R b/reports/timings.R
new file mode 100644
index 0000000..704fd82
--- /dev/null
+++ b/reports/timings.R
@@ -0,0 +1,84 @@
+# flexmix optimization to get beta
+fmOptim <- function(X, Y, K, link)
+{
+	dat <- as.data.frame( cbind(Y,X) )
+	fm <- flexmix( cbind(Y, 1-Y) ~ .-Y, data=dat, k=K,
+		model = FLXMRglm(family = binomial(link = link)) )
+	p <- mean(fm@posterior[["scaled"]][,1])
+	out <- refit(fm)
+	beta_b <- sapply( seq_len(K), function(i) as.double( out@components[[1]][[i]][,1] ) )
+	list("p"=p, "beta"=beta_b[2:nrow(beta_b),], "b"=beta_b[1,])
+	NULL
+}
+
+# Our package optimization for beta (using mu as a starting point)
+ourOptim <- function(X, Y, K, link)
+{
+	M <- computeMoments(X, Y)
+	mu <- computeMu(X, Y, list(K=K,M=M))
+	x_init = c(1/2, as.double(mu), c(0,0))
+	optimParams(K, link, list(M=M))$run(x_init)
+	NULL
+}
+
+# Get timings for both methods with the same beta matrix
+getTimings <- function(link)
+{
+	timings <- list('fm'=matrix(0,nrow=10,ncol=7),'our'=matrix(0,nrow=10,ncol=7))
+	K <- 2
+	for (d in c(2,5,10))
+	{
+		beta <- matrix(runif(d*K,min=-5,max=5),ncol=K)
+		for (logn in 4:6)
+		{
+			n <- 10^logn
+			io <- generateSampleIO(n, rep(1/K,K-1), beta, runif(K), link)
+			timings[['fm']][d,logn] <- system.time(fmOptim(io$X,io$Y,K,link))[3]
+			timings[['our']][d,logn] <- system.time(ourOptim(io$X,io$Y,K,link))[3]
+		}
+	}
+	timings
+}
+
+#model = binomial
+link <- "logit"
+ncores <- 1
+N <- 100
+
+cmd_args <- commandArgs()
+for (arg in cmd_args)
+{
+	if (substr(arg,1,1)!='-')
+	{
+		spl <- strsplit(arg,'=')[[1]]
+		if (spl[1] == "link") {
+			link <- spl[2]
+		} else if (spl[1] == "nc") {
+			ncores <- as.integer(spl[2])
+		} else if (spl[1] == "N") {
+			N <- as.integer(spl[2])
+		}
+	}
+}
+
+library(morpheus)
+library(flexmix)
+source("../patch_Bettina/FLXMRglm.R")
+
+tm <-
+	if (ncores == 1) {
+		lapply(1:N, function(i) {
+			print(paste("Run",i))
+			getTimings(link)
+		})
+	} else {
+		library(parallel)
+		mclapply(1:N, function(i) {
+			print(paste("Run",i))
+			getTimings(link)
+		},
+		mc.preschedule=FALSE, mc.cores=ncores)
+	}
+tm_params <- list("link"=link, "N"=N, "nc"=ncores)
+
+save("tm", "tm_params", file="timings.RData")
-- 
2.44.0