From: Benjamin Auder Date: Thu, 12 Jul 2018 14:36:33 +0000 (+0200) Subject: Add title arg to plotCoefs X-Git-Url: https://git.auder.net/variants/pieces/%7B%7B%20asset%28%27mixstore/css/current/git-favicon.png?a=commitdiff_plain;h=476a79d4b332d378716e320fdfd95fa3e1d7be9f;p=morpheus.git Add title arg to plotCoefs --- diff --git a/pkg/R/plot.R b/pkg/R/plot.R index 8acd2c0..35e89ff 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -69,7 +69,7 @@ plotBox <- function(mr, x, y) #' @examples #' #See example in ?plotHist #' @export -plotCoefs <- function(mr, params, idx) +plotCoefs <- function(mr, params, idx, xtitle="Parameter") { L <- nrow(mr[[1]][[1]]) K <- ncol(mr[[1]][[1]]) @@ -94,7 +94,7 @@ plotCoefs <- function(mr, params, idx) avg_param <- as.double(params_hat) std_param <- as.double(stdev) matplot(cbind(params[o],avg_param[o],avg_param[o]+std_param[o],avg_param[o]-std_param[o]), - col=1, lty=c(1,5,3,3), type="l", lwd=2, xlab="Parameter", ylab="Value") + col=1, lty=c(1,5,3,3), type="l", lwd=2, xlab=xtitle, ylab="") #print(o) #not returning o to avoid weird Jupyter issue... (TODO:) } diff --git a/reports/SAVE_RES/multirun_10_logit.RData b/reports/SAVE_RES/multirun_10_logit.RData new file mode 100644 index 0000000..86129aa Binary files /dev/null and b/reports/SAVE_RES/multirun_10_logit.RData differ diff --git a/reports/SAVE_RES/multirun_10_probit.RData b/reports/SAVE_RES/multirun_10_probit.RData new file mode 100644 index 0000000..be2dd46 Binary files /dev/null and b/reports/SAVE_RES/multirun_10_probit.RData differ diff --git a/reports/SAVE_RES/multirun_2_logit.RData b/reports/SAVE_RES/multirun_2_logit.RData new file mode 100644 index 0000000..1ef1e7a Binary files /dev/null and b/reports/SAVE_RES/multirun_2_logit.RData differ diff --git a/reports/SAVE_RES/multirun_2_probit.RData b/reports/SAVE_RES/multirun_2_probit.RData new file mode 100644 index 0000000..a318e65 Binary files /dev/null and b/reports/SAVE_RES/multirun_2_probit.RData differ diff --git a/reports/SAVE_RES/multirun_5_logit.RData b/reports/SAVE_RES/multirun_5_logit.RData new file mode 100644 index 0000000..5bbf125 Binary files /dev/null and b/reports/SAVE_RES/multirun_5_logit.RData differ diff --git a/reports/SAVE_RES/multirun_5_probit.RData b/reports/SAVE_RES/multirun_5_probit.RData new file mode 100644 index 0000000..c88c41f Binary files /dev/null and b/reports/SAVE_RES/multirun_5_probit.RData differ diff --git a/reports/accuracy.R b/reports/accuracy.R new file mode 100644 index 0000000..d646ae3 --- /dev/null +++ b/reports/accuracy.R @@ -0,0 +1,100 @@ +optimBeta <- function(N, n, K, p, beta, b, link, ncores) +{ + library(morpheus) + res <- multiRun( + list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link)), + list( + # morpheus + function(fargs) { + library(morpheus) + K <- fargs$optargs$K + M <- computeMoments(fargs$X, fargs$Y) + fargs$optargs$M <- M + mu <- computeMu(fargs$X, fargs$Y, fargs$optargs) + op <- optimParams(K,link,fargs$optargs) + x_init <- c( rep(1/K,K-1), as.double(mu), rep(0,K) ) + do.call(rbind, op$run(x_init)) + }, + # flexmix + function(fargs) { + library(flexmix) + source("../patch_Bettina/FLXMRglm.R") + K <- fargs$optargs$K + dat <- as.data.frame( cbind(fargs$Y,fargs$X) ) + fm <- flexmix( cbind(V1, 1-V1) ~ .-V1, data=dat, k=K, + model = FLXMRglm(family = binomial(link = link)) ) + p <- mean(fm@posterior[["scaled"]][,1]) + out <- refit(fm) + beta_b <- sapply( seq_len(K), function(i) { + as.double( out@components[[1]][[i]][,1] ) + } ) + rbind(p, beta_b[2:nrow(beta_b),], beta_b[1,]) + } ), + prepareArgs = function(fargs, index) { + library(morpheus) + io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link) + fargs$X = io$X + fargs$Y = io$Y + fargs$optargs$K = ncol(fargs$beta) + fargs$optargs$M = computeMoments(io$X,io$Y) + fargs + }, N=N, ncores=ncores, verbose=TRUE) + p <- c(p, 1-sum(p)) + for (i in 1:2) + res[[i]] <- alignMatrices(res[[i]], ref=rbind(p,beta,b), ls_mode="exact") + res +} + +#model = binomial; default values: +link = "logit" +N <- 10 +d <- 2 +n <- 1e4 +ncores <- 1 + +cmd_args <- commandArgs() +for (arg in cmd_args) +{ + if (substr(arg,1,1)!='-') { + spl <- strsplit(arg,'=')[[1]] + if (spl[1] == "nc") { + ncores <- as.integer(spl[2]) + } else if (spl[1] == "N") { + N <- as.integer(spl[2]) + } else if (spl[1] == "n") { + n <- as.integer(spl[2]) + } else if (spl[1] == "d") { + d <- as.integer(spl[2]) + } else if (spl[1] == "link") { + link <- spl[2] + } + } +} + +if (d == 2) { + K <- 2 + p <- .5 + b <- c(-.2, .5) + beta <- matrix( c(1,-2, 3,1), ncol=K ) +} else if (d == 5) { + K <- 2 + p <- .5 + b <- c(-.2, .5) + beta <- matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K ) +} else if (d == 10) { + K <- 3 + p <- c(.3, .3) + b <- c(-.2, 0, .5) + beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2), ncol=K ) +} else if (d == 20) { + K <- 3 + p <- c(.3, .3) + b <- c(-.2, 0, .5) + beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2,2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2,1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0,1,1,2,2,-2,-2,3,1,0,0), ncol=K ) +} + +mr <- optimBeta(N, n, K, p, beta, b, link, ncores) +mr_params <- list("N"=N, "n"=n, "K"=K, "d"=d, "link"=link, + "p"=c(p,1-sum(p)), "beta"=beta, "b"=b) + +save("mr", "mr_params", file=paste("multirun_",d,"_",link,".RData",sep="")) diff --git a/reports/multirun.RData b/reports/multirun.RData new file mode 100644 index 0000000..136740a Binary files /dev/null and b/reports/multirun.RData differ diff --git a/reports/multirun_10_logit.RData b/reports/multirun_10_logit.RData new file mode 100644 index 0000000..366e7a8 Binary files /dev/null and b/reports/multirun_10_logit.RData differ diff --git a/reports/multirun_10_probit.RData b/reports/multirun_10_probit.RData new file mode 100644 index 0000000..b87740e Binary files /dev/null and b/reports/multirun_10_probit.RData differ diff --git a/reports/multirun_2_logit.RData b/reports/multirun_2_logit.RData new file mode 100644 index 0000000..06a862d Binary files /dev/null and b/reports/multirun_2_logit.RData differ diff --git a/reports/multirun_2_probit.RData b/reports/multirun_2_probit.RData new file mode 100644 index 0000000..0b8b9dd Binary files /dev/null and b/reports/multirun_2_probit.RData differ diff --git a/reports/multirun_5_logit.RData b/reports/multirun_5_logit.RData new file mode 100644 index 0000000..cfba0d0 Binary files /dev/null and b/reports/multirun_5_logit.RData differ diff --git a/reports/multirun_5_probit.RData b/reports/multirun_5_probit.RData new file mode 100644 index 0000000..4e1d4d4 Binary files /dev/null and b/reports/multirun_5_probit.RData differ diff --git a/reports/multistart.R b/reports/multistart.R new file mode 100644 index 0000000..b3d1e10 --- /dev/null +++ b/reports/multistart.R @@ -0,0 +1,85 @@ +library(morpheus) + +#model = binomial +K <- 2 +p <- .5 +b <- c(-.2, .5) +# Default values: +link = "logit" +N <- 100 +d <- 2 +n <- 1e4 +ncores <- 1 +nstart <- 3 #nstart-1 random starting points for each MC run + +cmd_args <- commandArgs() +for (arg in cmd_args) +{ + if (substr(arg,1,1)!='-') + { + spl <- strsplit(arg,'=')[[1]] + if (spl[1] == "nc") { + ncores <- as.integer(spl[2]) + } else if (spl[1] == "N") { + N <- as.integer(spl[2]) + } else if (spl[1] == "n") { + n <- as.integer(spl[2]) + } else if (spl[1] == "d") { + d <- as.integer(spl[2]) + } else if (spl[1] == "link") { + link <- spl[2] + } else if (spl[1] == "nstart") { + nstart <- spl[2] + } + } +} +betas <- list( + matrix( c(1,-2, 3,1), ncol=K ), #d=2 + matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K ), #d=5 + matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0), ncol=K ) ) #d=10 +beta <- betas[[ ifelse( d==2, 1, ifelse(d==5,2,3) ) ]] + +ms <- multiRun( + list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link,nstart=nstart)), list( + function(fargs) { + # 1 start + library(morpheus) + K <- fargs$optargs$K + op <- optimParams(K, fargs$optargs$link, fargs$optargs) + x_init <- c(rep(1/K,K-1), as.double(fargs$mu), rep(0,K)) + do.call(rbind,op$run(x_init)) + }, + function(fargs) { + # B starts + library(morpheus) + K <- fargs$optargs$K + op <- optimParams(K, fargs$optargs$link, fargs$optargs) + best_val <- Inf + best_par <- list() + for (i in 1:fargs$optargs$nstart) + { + x_init <- c(rep(1/K,K-1), as.double(i*fargs$mu), rep(0,K)) + par <- op$run(x_init) + val <- op$f( op$linArgs(par) ) + if (val < best_val) + { + best_par <- par + best_val <- val + } + } + do.call(rbind,best_par) + }), + prepareArgs = function(fargs) { + library(morpheus) + io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link) + fargs$optargs$M <- computeMoments(io$X, io$Y) + mu <- computeMu(io$X, io$Y, fargs$optargs) + fargs$mu <- mu + }, N=N, ncores=ncores, verbose=TRUE) +for (i in 1:2) + ms[[i]] <- alignMatrices(ms[[i]], ref=rbind(p,beta,b), ls_mode="exact") + +ms_params <- list("N"=N, "nc"=ncores, "n"=n, "K"=K, "link"=link, + "p"=p, "beta"=beta, "b"=b, "nstart"=nstart) + +save(ms, ms_params, file="multistart.RData") diff --git a/reports/run_accu_cl.sh b/reports/run_accu_cl.sh new file mode 100644 index 0000000..11bb5e0 --- /dev/null +++ b/reports/run_accu_cl.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +#PBS -l nodes=1:ppn=15,mem=8gb,pmem=512mb +#PBS -j oe + +#PBS -o .output +rm -f .output + +WORKDIR=/workdir2/auder/morpheus/reports +cd $WORKDIR + +module load R + +# arg --vanilla maybe possible on cluster +for d in 10 20; do + for link in "logit" "probit"; do + R --slave --args N=1000 n=1e5 nc=15 d=$d link=$link out$d$link 2>&1 + done +done diff --git a/reports/run_time_cl.sh b/reports/run_time_cl.sh new file mode 100644 index 0000000..3055072 --- /dev/null +++ b/reports/run_time_cl.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +#PBS -l nodes=1:ppn=16,mem=8gb,pmem=512mb +#PBS -j oe + +#PBS -o .output +rm -f .output + +WORKDIR=/workdir2/auder/morpheus/reports +cd $WORKDIR + +module load R + +# arg --vanilla maybe possible on cluster +R --slave --args N=1000 nc=16 link=logit out 2>&1 diff --git a/reports/timings.R b/reports/timings.R new file mode 100644 index 0000000..704fd82 --- /dev/null +++ b/reports/timings.R @@ -0,0 +1,84 @@ +# flexmix optimization to get beta +fmOptim <- function(X, Y, K, link) +{ + dat <- as.data.frame( cbind(Y,X) ) + fm <- flexmix( cbind(Y, 1-Y) ~ .-Y, data=dat, k=K, + model = FLXMRglm(family = binomial(link = link)) ) + p <- mean(fm@posterior[["scaled"]][,1]) + out <- refit(fm) + beta_b <- sapply( seq_len(K), function(i) as.double( out@components[[1]][[i]][,1] ) ) + list("p"=p, "beta"=beta_b[2:nrow(beta_b),], "b"=beta_b[1,]) + NULL +} + +# Our package optimization for beta (using mu as a starting point) +ourOptim <- function(X, Y, K, link) +{ + M <- computeMoments(X, Y) + mu <- computeMu(X, Y, list(K=K,M=M)) + x_init = c(1/2, as.double(mu), c(0,0)) + optimParams(K, link, list(M=M))$run(x_init) + NULL +} + +# Get timings for both methods with the same beta matrix +getTimings <- function(link) +{ + timings <- list('fm'=matrix(0,nrow=10,ncol=7),'our'=matrix(0,nrow=10,ncol=7)) + K <- 2 + for (d in c(2,5,10)) + { + beta <- matrix(runif(d*K,min=-5,max=5),ncol=K) + for (logn in 4:6) + { + n <- 10^logn + io <- generateSampleIO(n, rep(1/K,K-1), beta, runif(K), link) + timings[['fm']][d,logn] <- system.time(fmOptim(io$X,io$Y,K,link))[3] + timings[['our']][d,logn] <- system.time(ourOptim(io$X,io$Y,K,link))[3] + } + } + timings +} + +#model = binomial +link <- "logit" +ncores <- 1 +N <- 100 + +cmd_args <- commandArgs() +for (arg in cmd_args) +{ + if (substr(arg,1,1)!='-') + { + spl <- strsplit(arg,'=')[[1]] + if (spl[1] == "link") { + link <- spl[2] + } else if (spl[1] == "nc") { + ncores <- as.integer(spl[2]) + } else if (spl[1] == "N") { + N <- as.integer(spl[2]) + } + } +} + +library(morpheus) +library(flexmix) +source("../patch_Bettina/FLXMRglm.R") + +tm <- + if (ncores == 1) { + lapply(1:N, function(i) { + print(paste("Run",i)) + getTimings(link) + }) + } else { + library(parallel) + mclapply(1:N, function(i) { + print(paste("Run",i)) + getTimings(link) + }, + mc.preschedule=FALSE, mc.cores=ncores) + } +tm_params <- list("link"=link, "N"=N, "nc"=ncores) + +save("tm", "tm_params", file="timings.RData") diff --git a/reports/timings_logit.RData b/reports/timings_logit.RData new file mode 100644 index 0000000..cb2851c Binary files /dev/null and b/reports/timings_logit.RData differ diff --git a/reports/timings_save.RData b/reports/timings_save.RData new file mode 100644 index 0000000..0097a46 Binary files /dev/null and b/reports/timings_save.RData differ