From 1d014a8640a33f16f0d21244a1419134e8553910 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 20 Sep 2019 08:59:44 +0200 Subject: [PATCH] Add updated files for reporting --- .gitignore | 4 +- reports/accuracy.R | 100 +++++++++++++++++++++++++++++++++++++++++ reports/multistart.R | 85 +++++++++++++++++++++++++++++++++++ reports/run_accu_cl.sh | 19 ++++++++ reports/run_time_cl.sh | 15 +++++++ reports/timings.R | 84 ++++++++++++++++++++++++++++++++++ 6 files changed, 306 insertions(+), 1 deletion(-) create mode 100644 reports/accuracy.R create mode 100644 reports/multistart.R create mode 100644 reports/run_accu_cl.sh create mode 100644 reports/run_time_cl.sh create mode 100644 reports/timings.R diff --git a/.gitignore b/.gitignore index 2c6aecc..260e0f5 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,9 @@ NAMESPACE /pkg-cran/ # Ignore various reports -/reports/ +/reports/* +!/reports/*.R +!/reports/*.sh /vignettes/report.html /vignettes/report_cache/ /vignettes/report_files/ diff --git a/reports/accuracy.R b/reports/accuracy.R new file mode 100644 index 0000000..2f35792 --- /dev/null +++ b/reports/accuracy.R @@ -0,0 +1,100 @@ +optimBeta <- function(N, n, K, p, beta, b, link, ncores) +{ + library(morpheus) + res <- multiRun( + list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link)), + list( + # morpheus + function(fargs) { + library(morpheus) + K <- fargs$optargs$K + M <- computeMoments(fargs$X, fargs$Y) + fargs$optargs$M <- M + mu <- computeMu(fargs$X, fargs$Y, fargs$optargs) + op <- optimParams(K,link,fargs$optargs) + x_init <- list(p=rep(1/K,K-1), beta=mu, b=rep(0,K)) + do.call(rbind, op$run(x_init)) + }, + # flexmix + function(fargs) { + library(flexmix) + source("../patch_Bettina/FLXMRglm.R") + K <- fargs$optargs$K + dat <- as.data.frame( cbind(fargs$Y,fargs$X) ) + fm <- flexmix( cbind(V1, 1-V1) ~ .-V1, data=dat, k=K, + model = FLXMRglm(family = binomial(link = link)) ) + p <- mean(fm@posterior[["scaled"]][,1]) + out <- refit(fm) + beta_b <- sapply( seq_len(K), function(i) { + as.double( out@components[[1]][[i]][,1] ) + } ) + rbind(p, beta_b[2:nrow(beta_b),], beta_b[1,]) + } ), + prepareArgs = function(fargs, index) { + library(morpheus) + io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link) + fargs$X = io$X + fargs$Y = io$Y + fargs$optargs$K = ncol(fargs$beta) + fargs$optargs$M = computeMoments(io$X,io$Y) + fargs + }, N=N, ncores=ncores, verbose=TRUE) + p <- c(p, 1-sum(p)) + for (i in 1:2) + res[[i]] <- alignMatrices(res[[i]], ref=rbind(p,beta,b), ls_mode="exact") + res +} + +#model = binomial; default values: +link = "logit" +N <- 10 +d <- 2 +n <- 1e4 +ncores <- 1 + +cmd_args <- commandArgs() +for (arg in cmd_args) +{ + if (substr(arg,1,1)!='-') { + spl <- strsplit(arg,'=')[[1]] + if (spl[1] == "nc") { + ncores <- as.integer(spl[2]) + } else if (spl[1] == "N") { + N <- as.integer(spl[2]) + } else if (spl[1] == "n") { + n <- as.integer(spl[2]) + } else if (spl[1] == "d") { + d <- as.integer(spl[2]) + } else if (spl[1] == "link") { + link <- spl[2] + } + } +} + +if (d == 2) { + K <- 2 + p <- .5 + b <- c(-.2, .5) + beta <- matrix( c(1,-2, 3,1), ncol=K ) +} else if (d == 5) { + K <- 2 + p <- .5 + b <- c(-.2, .5) + beta <- matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K ) +} else if (d == 10) { + K <- 3 + p <- c(.3, .3) + b <- c(-.2, 0, .5) + beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2), ncol=K ) +} else if (d == 20) { + K <- 3 + p <- c(.3, .3) + b <- c(-.2, 0, .5) + beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2,2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2,1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0,1,1,2,2,-2,-2,3,1,0,0), ncol=K ) +} + +mr <- optimBeta(N, n, K, p, beta, b, link, ncores) +mr_params <- list("N"=N, "n"=n, "K"=K, "d"=d, "link"=link, + "p"=c(p,1-sum(p)), "beta"=beta, "b"=b) + +save("mr", "mr_params", file=paste("multirun_",d,"_",link,".RData",sep="")) diff --git a/reports/multistart.R b/reports/multistart.R new file mode 100644 index 0000000..9971cd2 --- /dev/null +++ b/reports/multistart.R @@ -0,0 +1,85 @@ +library(morpheus) + +#model = binomial +K <- 2 +p <- .5 +b <- c(-.2, .5) +# Default values: +link = "logit" +N <- 100 +d <- 2 +n <- 1e4 +ncores <- 1 +nstart <- 3 #nstart-1 random starting points for each MC run + +cmd_args <- commandArgs() +for (arg in cmd_args) +{ + if (substr(arg,1,1)!='-') + { + spl <- strsplit(arg,'=')[[1]] + if (spl[1] == "nc") { + ncores <- as.integer(spl[2]) + } else if (spl[1] == "N") { + N <- as.integer(spl[2]) + } else if (spl[1] == "n") { + n <- as.integer(spl[2]) + } else if (spl[1] == "d") { + d <- as.integer(spl[2]) + } else if (spl[1] == "link") { + link <- spl[2] + } else if (spl[1] == "nstart") { + nstart <- spl[2] + } + } +} +betas <- list( + matrix( c(1,-2, 3,1), ncol=K ), #d=2 + matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K ), #d=5 + matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0), ncol=K ) ) #d=10 +beta <- betas[[ ifelse( d==2, 1, ifelse(d==5,2,3) ) ]] + +ms <- multiRun( + list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link,nstart=nstart)), list( + function(fargs) { + # 1 start + library(morpheus) + K <- fargs$optargs$K + op <- optimParams(K, fargs$optargs$link, fargs$optargs) + x_init <- list(p=rep(1/K,K-1), beta=fargs$mu, b=rep(0,K)) + do.call(rbind,op$run(x_init)) + }, + function(fargs) { + # B starts + library(morpheus) + K <- fargs$optargs$K + op <- optimParams(K, fargs$optargs$link, fargs$optargs) + best_val <- Inf + best_par <- list() + for (i in 1:fargs$optargs$nstart) + { + x_init <- list(p=rep(1/K,K-1), beta=i*fargs$mu, b=rep(0,K)) + par <- op$run(x_init) + val <- op$f( op$linArgs(par) ) + if (val < best_val) + { + best_par <- par + best_val <- val + } + } + do.call(rbind,best_par) + }), + prepareArgs = function(fargs) { + library(morpheus) + io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link) + fargs$optargs$M <- computeMoments(io$X, io$Y) + mu <- computeMu(io$X, io$Y, fargs$optargs) + fargs$mu <- mu + }, N=N, ncores=ncores, verbose=TRUE) +for (i in 1:2) + ms[[i]] <- alignMatrices(ms[[i]], ref=rbind(p,beta,b), ls_mode="exact") + +ms_params <- list("N"=N, "nc"=ncores, "n"=n, "K"=K, "link"=link, + "p"=p, "beta"=beta, "b"=b, "nstart"=nstart) + +save(ms, ms_params, file="multistart.RData") diff --git a/reports/run_accu_cl.sh b/reports/run_accu_cl.sh new file mode 100644 index 0000000..d1d8e37 --- /dev/null +++ b/reports/run_accu_cl.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +#PBS -l nodes=1:ppn=15,mem=8gb,pmem=512mb +#PBS -j oe + +#PBS -o .output +rm -f .output + +WORKDIR=/workdir2/auder/morpheus/reports +cd $WORKDIR + +module load R + +# arg --vanilla maybe possible on cluster +for d in 2 5 10 20; do + for link in "logit" "probit"; do + R --slave --args N=1000 n=1e5 nc=15 d=$d link=$link out$d$link 2>&1 + done +done diff --git a/reports/run_time_cl.sh b/reports/run_time_cl.sh new file mode 100644 index 0000000..3055072 --- /dev/null +++ b/reports/run_time_cl.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +#PBS -l nodes=1:ppn=16,mem=8gb,pmem=512mb +#PBS -j oe + +#PBS -o .output +rm -f .output + +WORKDIR=/workdir2/auder/morpheus/reports +cd $WORKDIR + +module load R + +# arg --vanilla maybe possible on cluster +R --slave --args N=1000 nc=16 link=logit out 2>&1 diff --git a/reports/timings.R b/reports/timings.R new file mode 100644 index 0000000..704fd82 --- /dev/null +++ b/reports/timings.R @@ -0,0 +1,84 @@ +# flexmix optimization to get beta +fmOptim <- function(X, Y, K, link) +{ + dat <- as.data.frame( cbind(Y,X) ) + fm <- flexmix( cbind(Y, 1-Y) ~ .-Y, data=dat, k=K, + model = FLXMRglm(family = binomial(link = link)) ) + p <- mean(fm@posterior[["scaled"]][,1]) + out <- refit(fm) + beta_b <- sapply( seq_len(K), function(i) as.double( out@components[[1]][[i]][,1] ) ) + list("p"=p, "beta"=beta_b[2:nrow(beta_b),], "b"=beta_b[1,]) + NULL +} + +# Our package optimization for beta (using mu as a starting point) +ourOptim <- function(X, Y, K, link) +{ + M <- computeMoments(X, Y) + mu <- computeMu(X, Y, list(K=K,M=M)) + x_init = c(1/2, as.double(mu), c(0,0)) + optimParams(K, link, list(M=M))$run(x_init) + NULL +} + +# Get timings for both methods with the same beta matrix +getTimings <- function(link) +{ + timings <- list('fm'=matrix(0,nrow=10,ncol=7),'our'=matrix(0,nrow=10,ncol=7)) + K <- 2 + for (d in c(2,5,10)) + { + beta <- matrix(runif(d*K,min=-5,max=5),ncol=K) + for (logn in 4:6) + { + n <- 10^logn + io <- generateSampleIO(n, rep(1/K,K-1), beta, runif(K), link) + timings[['fm']][d,logn] <- system.time(fmOptim(io$X,io$Y,K,link))[3] + timings[['our']][d,logn] <- system.time(ourOptim(io$X,io$Y,K,link))[3] + } + } + timings +} + +#model = binomial +link <- "logit" +ncores <- 1 +N <- 100 + +cmd_args <- commandArgs() +for (arg in cmd_args) +{ + if (substr(arg,1,1)!='-') + { + spl <- strsplit(arg,'=')[[1]] + if (spl[1] == "link") { + link <- spl[2] + } else if (spl[1] == "nc") { + ncores <- as.integer(spl[2]) + } else if (spl[1] == "N") { + N <- as.integer(spl[2]) + } + } +} + +library(morpheus) +library(flexmix) +source("../patch_Bettina/FLXMRglm.R") + +tm <- + if (ncores == 1) { + lapply(1:N, function(i) { + print(paste("Run",i)) + getTimings(link) + }) + } else { + library(parallel) + mclapply(1:N, function(i) { + print(paste("Run",i)) + getTimings(link) + }, + mc.preschedule=FALSE, mc.cores=ncores) + } +tm_params <- list("link"=link, "N"=N, "nc"=ncores) + +save("tm", "tm_params", file="timings.RData") -- 2.44.0