Add title arg to plotCoefs
authorBenjamin Auder <benjamin.auder@somewhere>
Thu, 12 Jul 2018 14:36:33 +0000 (16:36 +0200)
committerBenjamin Auder <benjamin.auder@somewhere>
Thu, 12 Jul 2018 14:36:33 +0000 (16:36 +0200)
21 files changed:
pkg/R/plot.R
reports/SAVE_RES/multirun_10_logit.RData [new file with mode: 0644]
reports/SAVE_RES/multirun_10_probit.RData [new file with mode: 0644]
reports/SAVE_RES/multirun_2_logit.RData [new file with mode: 0644]
reports/SAVE_RES/multirun_2_probit.RData [new file with mode: 0644]
reports/SAVE_RES/multirun_5_logit.RData [new file with mode: 0644]
reports/SAVE_RES/multirun_5_probit.RData [new file with mode: 0644]
reports/accuracy.R [new file with mode: 0644]
reports/multirun.RData [new file with mode: 0644]
reports/multirun_10_logit.RData [new file with mode: 0644]
reports/multirun_10_probit.RData [new file with mode: 0644]
reports/multirun_2_logit.RData [new file with mode: 0644]
reports/multirun_2_probit.RData [new file with mode: 0644]
reports/multirun_5_logit.RData [new file with mode: 0644]
reports/multirun_5_probit.RData [new file with mode: 0644]
reports/multistart.R [new file with mode: 0644]
reports/run_accu_cl.sh [new file with mode: 0644]
reports/run_time_cl.sh [new file with mode: 0644]
reports/timings.R [new file with mode: 0644]
reports/timings_logit.RData [new file with mode: 0644]
reports/timings_save.RData [new file with mode: 0644]

index 8acd2c0..35e89ff 100644 (file)
@@ -69,7 +69,7 @@ plotBox <- function(mr, x, y)
 #' @examples
 #' #See example in ?plotHist
 #' @export
-plotCoefs <- function(mr, params, idx)
+plotCoefs <- function(mr, params, idx, xtitle="Parameter")
 {
        L <- nrow(mr[[1]][[1]])
        K <- ncol(mr[[1]][[1]])
@@ -94,7 +94,7 @@ plotCoefs <- function(mr, params, idx)
        avg_param <- as.double(params_hat)
        std_param <- as.double(stdev)
        matplot(cbind(params[o],avg_param[o],avg_param[o]+std_param[o],avg_param[o]-std_param[o]),
-               col=1, lty=c(1,5,3,3), type="l", lwd=2, xlab="Parameter", ylab="Value")
+               col=1, lty=c(1,5,3,3), type="l", lwd=2, xlab=xtitle, ylab="")
 
        #print(o) #not returning o to avoid weird Jupyter issue... (TODO:)
 }
diff --git a/reports/SAVE_RES/multirun_10_logit.RData b/reports/SAVE_RES/multirun_10_logit.RData
new file mode 100644 (file)
index 0000000..86129aa
Binary files /dev/null and b/reports/SAVE_RES/multirun_10_logit.RData differ
diff --git a/reports/SAVE_RES/multirun_10_probit.RData b/reports/SAVE_RES/multirun_10_probit.RData
new file mode 100644 (file)
index 0000000..be2dd46
Binary files /dev/null and b/reports/SAVE_RES/multirun_10_probit.RData differ
diff --git a/reports/SAVE_RES/multirun_2_logit.RData b/reports/SAVE_RES/multirun_2_logit.RData
new file mode 100644 (file)
index 0000000..1ef1e7a
Binary files /dev/null and b/reports/SAVE_RES/multirun_2_logit.RData differ
diff --git a/reports/SAVE_RES/multirun_2_probit.RData b/reports/SAVE_RES/multirun_2_probit.RData
new file mode 100644 (file)
index 0000000..a318e65
Binary files /dev/null and b/reports/SAVE_RES/multirun_2_probit.RData differ
diff --git a/reports/SAVE_RES/multirun_5_logit.RData b/reports/SAVE_RES/multirun_5_logit.RData
new file mode 100644 (file)
index 0000000..5bbf125
Binary files /dev/null and b/reports/SAVE_RES/multirun_5_logit.RData differ
diff --git a/reports/SAVE_RES/multirun_5_probit.RData b/reports/SAVE_RES/multirun_5_probit.RData
new file mode 100644 (file)
index 0000000..c88c41f
Binary files /dev/null and b/reports/SAVE_RES/multirun_5_probit.RData differ
diff --git a/reports/accuracy.R b/reports/accuracy.R
new file mode 100644 (file)
index 0000000..d646ae3
--- /dev/null
@@ -0,0 +1,100 @@
+optimBeta <- function(N, n, K, p, beta, b, link, ncores)
+{
+       library(morpheus)
+       res <- multiRun(
+               list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link)),
+               list(
+                       # morpheus
+                       function(fargs) {
+                               library(morpheus)
+                               K <- fargs$optargs$K
+                               M <- computeMoments(fargs$X, fargs$Y)
+                               fargs$optargs$M <- M
+                               mu <- computeMu(fargs$X, fargs$Y, fargs$optargs)
+                               op <- optimParams(K,link,fargs$optargs)
+                               x_init <- c( rep(1/K,K-1), as.double(mu), rep(0,K) )
+                               do.call(rbind, op$run(x_init))
+                       },
+                       # flexmix
+                       function(fargs) {
+                               library(flexmix)
+                               source("../patch_Bettina/FLXMRglm.R")
+                               K <- fargs$optargs$K
+                               dat <- as.data.frame( cbind(fargs$Y,fargs$X) )
+                               fm <- flexmix( cbind(V1, 1-V1) ~ .-V1, data=dat, k=K,
+                                       model = FLXMRglm(family = binomial(link = link)) )
+                               p <- mean(fm@posterior[["scaled"]][,1])
+                               out <- refit(fm)
+                               beta_b <- sapply( seq_len(K), function(i) {
+                                       as.double( out@components[[1]][[i]][,1] )
+                               } )
+                               rbind(p, beta_b[2:nrow(beta_b),], beta_b[1,])
+                       } ),
+               prepareArgs = function(fargs, index) {
+                       library(morpheus)
+                       io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link)
+                       fargs$X = io$X
+                       fargs$Y = io$Y
+                       fargs$optargs$K = ncol(fargs$beta)
+                       fargs$optargs$M = computeMoments(io$X,io$Y)
+                       fargs
+               }, N=N, ncores=ncores, verbose=TRUE)
+       p <- c(p, 1-sum(p))
+       for (i in 1:2)
+               res[[i]] <- alignMatrices(res[[i]], ref=rbind(p,beta,b), ls_mode="exact")
+       res
+}
+
+#model = binomial; default values:
+link = "logit"
+N <- 10
+d <- 2
+n <- 1e4
+ncores <- 1
+
+cmd_args <- commandArgs()
+for (arg in cmd_args)
+{
+       if (substr(arg,1,1)!='-') {
+               spl <- strsplit(arg,'=')[[1]]
+               if (spl[1] == "nc") {
+                       ncores <- as.integer(spl[2])
+               } else if (spl[1] == "N") {
+                       N <- as.integer(spl[2])
+               } else if (spl[1] == "n") {
+                       n <- as.integer(spl[2])
+               } else if (spl[1] == "d") {
+                       d <- as.integer(spl[2])
+               } else if (spl[1] == "link") {
+                       link <- spl[2]
+               }
+       }
+}
+
+if (d == 2) {
+       K <- 2
+       p <- .5
+       b <- c(-.2, .5)
+       beta <- matrix( c(1,-2, 3,1), ncol=K )
+} else if (d == 5) {
+       K <- 2
+       p <- .5
+       b <- c(-.2, .5)
+       beta <- matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K )
+} else if (d == 10) {
+       K <- 3
+       p <- c(.3, .3)
+       b <- c(-.2, 0, .5)
+       beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2), ncol=K )
+} else if (d == 20) {
+       K <- 3
+       p <- c(.3, .3)
+       b <- c(-.2, 0, .5)
+       beta <- matrix( c(1,2,-1,0,3,4,-1,-3,0,2,2,-3,0,1,0,-1,-4,3,2,0, -1,1,3,-1,0,0,2,0,1,-2,1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0,1,1,2,2,-2,-2,3,1,0,0), ncol=K )
+}
+
+mr <- optimBeta(N, n, K, p, beta, b, link, ncores)
+mr_params <- list("N"=N, "n"=n, "K"=K, "d"=d, "link"=link,
+       "p"=c(p,1-sum(p)), "beta"=beta, "b"=b)
+
+save("mr", "mr_params", file=paste("multirun_",d,"_",link,".RData",sep=""))
diff --git a/reports/multirun.RData b/reports/multirun.RData
new file mode 100644 (file)
index 0000000..136740a
Binary files /dev/null and b/reports/multirun.RData differ
diff --git a/reports/multirun_10_logit.RData b/reports/multirun_10_logit.RData
new file mode 100644 (file)
index 0000000..366e7a8
Binary files /dev/null and b/reports/multirun_10_logit.RData differ
diff --git a/reports/multirun_10_probit.RData b/reports/multirun_10_probit.RData
new file mode 100644 (file)
index 0000000..b87740e
Binary files /dev/null and b/reports/multirun_10_probit.RData differ
diff --git a/reports/multirun_2_logit.RData b/reports/multirun_2_logit.RData
new file mode 100644 (file)
index 0000000..06a862d
Binary files /dev/null and b/reports/multirun_2_logit.RData differ
diff --git a/reports/multirun_2_probit.RData b/reports/multirun_2_probit.RData
new file mode 100644 (file)
index 0000000..0b8b9dd
Binary files /dev/null and b/reports/multirun_2_probit.RData differ
diff --git a/reports/multirun_5_logit.RData b/reports/multirun_5_logit.RData
new file mode 100644 (file)
index 0000000..cfba0d0
Binary files /dev/null and b/reports/multirun_5_logit.RData differ
diff --git a/reports/multirun_5_probit.RData b/reports/multirun_5_probit.RData
new file mode 100644 (file)
index 0000000..4e1d4d4
Binary files /dev/null and b/reports/multirun_5_probit.RData differ
diff --git a/reports/multistart.R b/reports/multistart.R
new file mode 100644 (file)
index 0000000..b3d1e10
--- /dev/null
@@ -0,0 +1,85 @@
+library(morpheus)
+
+#model = binomial
+K <- 2
+p <- .5
+b <- c(-.2, .5)
+# Default values:
+link = "logit"
+N <- 100
+d <- 2
+n <- 1e4
+ncores <- 1
+nstart <- 3 #nstart-1 random starting points for each MC run
+
+cmd_args <- commandArgs()
+for (arg in cmd_args)
+{
+       if (substr(arg,1,1)!='-')
+       {
+               spl <- strsplit(arg,'=')[[1]]
+               if (spl[1] == "nc") {
+                       ncores <- as.integer(spl[2])
+               } else if (spl[1] == "N") {
+                       N <- as.integer(spl[2])
+               } else if (spl[1] == "n") {
+                       n <- as.integer(spl[2])
+               } else if (spl[1] == "d") {
+                       d <- as.integer(spl[2])
+               } else if (spl[1] == "link") {
+                       link <- spl[2]
+               } else if (spl[1] == "nstart") {
+                       nstart <- spl[2]
+               }
+       }
+}
+betas <- list(
+       matrix( c(1,-2, 3,1), ncol=K ), #d=2
+       matrix( c(1,2,-1,0,3, 2,-3,0,1,0), ncol=K ), #d=5
+       matrix( c(1,2,-1,0,3,4,-1,-3,0,2, 2,-3,0,1,0,-1,-4,3,2,0), ncol=K ) ) #d=10
+beta <- betas[[ ifelse( d==2, 1, ifelse(d==5,2,3) ) ]]
+
+ms <- multiRun(
+       list(n=n,p=p,beta=beta,b=b,optargs=list(K=K,link=link,nstart=nstart)), list(
+               function(fargs) {
+                       # 1 start
+                       library(morpheus)
+                       K <- fargs$optargs$K
+                       op <- optimParams(K, fargs$optargs$link, fargs$optargs)
+                       x_init <- c(rep(1/K,K-1), as.double(fargs$mu), rep(0,K))
+                       do.call(rbind,op$run(x_init))
+               },
+               function(fargs) {
+                       # B starts
+                       library(morpheus)
+                       K <- fargs$optargs$K
+                       op <- optimParams(K, fargs$optargs$link, fargs$optargs)
+                       best_val <- Inf
+                       best_par <- list()
+                       for (i in 1:fargs$optargs$nstart)
+                       {
+                               x_init <- c(rep(1/K,K-1), as.double(i*fargs$mu), rep(0,K))
+                               par <- op$run(x_init)
+                               val <- op$f( op$linArgs(par) )
+                               if (val < best_val)
+                               {
+                                       best_par <- par
+                                       best_val <- val
+                               }
+                       }
+                       do.call(rbind,best_par)
+               }),
+               prepareArgs = function(fargs) {
+                       library(morpheus)
+                       io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link)
+                       fargs$optargs$M <- computeMoments(io$X, io$Y)
+                       mu <- computeMu(io$X, io$Y, fargs$optargs)
+                       fargs$mu <- mu
+               }, N=N, ncores=ncores, verbose=TRUE)
+for (i in 1:2)
+       ms[[i]] <- alignMatrices(ms[[i]], ref=rbind(p,beta,b), ls_mode="exact")
+
+ms_params <- list("N"=N, "nc"=ncores, "n"=n, "K"=K, "link"=link,
+       "p"=p, "beta"=beta, "b"=b, "nstart"=nstart)
+
+save(ms, ms_params, file="multistart.RData")
diff --git a/reports/run_accu_cl.sh b/reports/run_accu_cl.sh
new file mode 100644 (file)
index 0000000..11bb5e0
--- /dev/null
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+#PBS -l nodes=1:ppn=15,mem=8gb,pmem=512mb
+#PBS -j oe
+
+#PBS -o .output
+rm -f .output
+
+WORKDIR=/workdir2/auder/morpheus/reports
+cd $WORKDIR
+
+module load R
+
+# arg --vanilla maybe possible on cluster
+for d in 10 20; do
+       for link in "logit" "probit"; do
+               R --slave --args N=1000 n=1e5 nc=15 d=$d link=$link <accuracy.R >out$d$link 2>&1
+       done
+done
diff --git a/reports/run_time_cl.sh b/reports/run_time_cl.sh
new file mode 100644 (file)
index 0000000..3055072
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+#PBS -l nodes=1:ppn=16,mem=8gb,pmem=512mb
+#PBS -j oe
+
+#PBS -o .output
+rm -f .output
+
+WORKDIR=/workdir2/auder/morpheus/reports
+cd $WORKDIR
+
+module load R
+
+# arg --vanilla maybe possible on cluster
+R --slave --args N=1000 nc=16 link=logit <timings.R >out 2>&1
diff --git a/reports/timings.R b/reports/timings.R
new file mode 100644 (file)
index 0000000..704fd82
--- /dev/null
@@ -0,0 +1,84 @@
+# flexmix optimization to get beta
+fmOptim <- function(X, Y, K, link)
+{
+       dat <- as.data.frame( cbind(Y,X) )
+       fm <- flexmix( cbind(Y, 1-Y) ~ .-Y, data=dat, k=K,
+               model = FLXMRglm(family = binomial(link = link)) )
+       p <- mean(fm@posterior[["scaled"]][,1])
+       out <- refit(fm)
+       beta_b <- sapply( seq_len(K), function(i) as.double( out@components[[1]][[i]][,1] ) )
+       list("p"=p, "beta"=beta_b[2:nrow(beta_b),], "b"=beta_b[1,])
+       NULL
+}
+
+# Our package optimization for beta (using mu as a starting point)
+ourOptim <- function(X, Y, K, link)
+{
+       M <- computeMoments(X, Y)
+       mu <- computeMu(X, Y, list(K=K,M=M))
+       x_init = c(1/2, as.double(mu), c(0,0))
+       optimParams(K, link, list(M=M))$run(x_init)
+       NULL
+}
+
+# Get timings for both methods with the same beta matrix
+getTimings <- function(link)
+{
+       timings <- list('fm'=matrix(0,nrow=10,ncol=7),'our'=matrix(0,nrow=10,ncol=7))
+       K <- 2
+       for (d in c(2,5,10))
+       {
+               beta <- matrix(runif(d*K,min=-5,max=5),ncol=K)
+               for (logn in 4:6)
+               {
+                       n <- 10^logn
+                       io <- generateSampleIO(n, rep(1/K,K-1), beta, runif(K), link)
+                       timings[['fm']][d,logn] <- system.time(fmOptim(io$X,io$Y,K,link))[3]
+                       timings[['our']][d,logn] <- system.time(ourOptim(io$X,io$Y,K,link))[3]
+               }
+       }
+       timings
+}
+
+#model = binomial
+link <- "logit"
+ncores <- 1
+N <- 100
+
+cmd_args <- commandArgs()
+for (arg in cmd_args)
+{
+       if (substr(arg,1,1)!='-')
+       {
+               spl <- strsplit(arg,'=')[[1]]
+               if (spl[1] == "link") {
+                       link <- spl[2]
+               } else if (spl[1] == "nc") {
+                       ncores <- as.integer(spl[2])
+               } else if (spl[1] == "N") {
+                       N <- as.integer(spl[2])
+               }
+       }
+}
+
+library(morpheus)
+library(flexmix)
+source("../patch_Bettina/FLXMRglm.R")
+
+tm <-
+       if (ncores == 1) {
+               lapply(1:N, function(i) {
+                       print(paste("Run",i))
+                       getTimings(link)
+               })
+       } else {
+               library(parallel)
+               mclapply(1:N, function(i) {
+                       print(paste("Run",i))
+                       getTimings(link)
+               },
+               mc.preschedule=FALSE, mc.cores=ncores)
+       }
+tm_params <- list("link"=link, "N"=N, "nc"=ncores)
+
+save("tm", "tm_params", file="timings.RData")
diff --git a/reports/timings_logit.RData b/reports/timings_logit.RData
new file mode 100644 (file)
index 0000000..cb2851c
Binary files /dev/null and b/reports/timings_logit.RData differ
diff --git a/reports/timings_save.RData b/reports/timings_save.RData
new file mode 100644 (file)
index 0000000..0097a46
Binary files /dev/null and b/reports/timings_save.RData differ