From f47183deb252f12d532af81a491083f1411cacc8 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 13 Sep 2023 16:38:55 +0200 Subject: [PATCH 01/16] Remove OpenMP dependency (and unused variable) --- pkg/DESCRIPTION | 2 +- pkg/src/functions.c | 6 +++--- pkg/src/hungarian.c | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index c2833c6..706925c 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -6,7 +6,7 @@ Description: Mixture of logistic regressions parameters (H)estimation with (General Linear Model). For more details see chapter 3 in the PhD thesis of Mor-Absa Loum: , available here . -Version: 1.0-3 +Version: 1.0-4 Author: Benjamin Auder [aut,cre], Mor-Absa Loum [aut] Maintainer: Benjamin Auder diff --git a/pkg/src/functions.c b/pkg/src/functions.c index 79deff4..89e84ef 100644 --- a/pkg/src/functions.c +++ b/pkg/src/functions.c @@ -1,5 +1,5 @@ #include -#include +//#include // Index matrix (by columns) #define mi(i, j, d1, d2) (j*d1 + i) @@ -53,7 +53,7 @@ void Moments_M3(double* X, double* Y, int* pn, int* pd, double* M3) // with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, double* W) { - int nc=*pnc, n=*pn, d=*pd; + int n=*pn, d=*pd; //,nc=*pnc int dim = d + d*d + d*d*d; //double* W = (double*)malloc(dim*dim*sizeof(double)); @@ -101,7 +101,7 @@ void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, dou // This final nested loop is very costly. Some basic optimisations: double gj = g[j]; int baseIdx = j * dim; - #pragma GCC unroll 32 +// #pragma GCC unroll 32 for (int k=j; k>=0; k--) W[baseIdx+k] += gj * g[k]; } diff --git a/pkg/src/hungarian.c b/pkg/src/hungarian.c index 9671a43..3b2a28d 100644 --- a/pkg/src/hungarian.c +++ b/pkg/src/hungarian.c @@ -112,7 +112,7 @@ void hungarian_free(hungarian_problem_t* p) { void hungarian_solve(hungarian_problem_t* p) { int i, j, m, n, k, l, t, q, unmatched; - double cost, s; + double s; //,cost int* col_mate; int* row_mate; int* parent_row; @@ -122,7 +122,7 @@ void hungarian_solve(hungarian_problem_t* p) double* slack; int* slack_row; - cost = 0.; +// cost = 0.; m =p->num_rows; n =p->num_cols; @@ -160,7 +160,7 @@ void hungarian_solve(hungarian_problem_t* p) for (k=1; kcost[k][l]cost[k][l]; - cost+=s; +// cost+=s; if (s!=0.) for (k=0; kcost[k][l]-=s; @@ -335,11 +335,11 @@ done: } /*TRACE("\n");*/ } - for (i=0; i Date: Mon, 6 Nov 2023 21:17:40 +0100 Subject: [PATCH 02/16] update --- .gitfat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitfat b/.gitfat index ec2f583..410d78e 100644 --- a/.gitfat +++ b/.gitfat @@ -1,2 +1,2 @@ [rsync] -remote = gitfat@auder.net:~/files/morpheus +remote = gitfat@auder.net:files/morpheus -- 2.53.0 From a01887096514a96479d47e6edb6a0fd98b0ce990 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 20 Dec 2024 00:47:40 +0100 Subject: [PATCH 03/16] Add TODO --- TODO | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..eff3514 --- /dev/null +++ b/TODO @@ -0,0 +1,3 @@ +Intégrer calcul p-value --> cf. article +Framework figure --> faire workflow + -- 2.53.0 From 72b269e90df3fa7760cc6634393d2e16a9111d72 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 10 Jan 2025 02:08:01 +0100 Subject: [PATCH 04/16] Fix vignette --- .gitignore | 7 +++++-- pkg/DESCRIPTION | 3 ++- pkg/R/plot.R | 3 +-- vignettes/report.Rmd | 20 +++++++++++--------- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index e6a7351..eb90000 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +/_unversioned/ + #ignore roxygen2 generated files *.Rd !*-package.Rd @@ -13,9 +15,10 @@ /reports/* !/reports/*.R !/reports/*.sh +/vignettes/cache/ +/vignettes/figure/ /vignettes/report.html -/vignettes/report_cache/ -/vignettes/report_files/ +/vignettes/report.md /vignettes/*.zip #ignore R session files diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index 706925c..d3b5427 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -1,3 +1,4 @@ +Encoding: UTF-8 Package: morpheus Title: Estimate Parameters of Mixtures of Logistic Regressions Description: Mixture of logistic regressions parameters (H)estimation with @@ -24,7 +25,7 @@ Suggests: testthat (>= 3.0.0), roxygen2 License: MIT + file LICENSE -RoxygenNote: 7.1.1 +RoxygenNote: 7.3.2 URL: https://github.com/yagu0/morpheus Collate: 'utils.R' diff --git a/pkg/R/plot.R b/pkg/R/plot.R index 328b18c..93e9e65 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -88,8 +88,7 @@ plotBox <- function(mr, x, y, ...) args <- list(...) for (i in 1:L) { - boxplot(params[[i]], - ifelse("ylab" %in% names(args), args$ylab, "Parameter value")) + boxplot(params[[i]], ...) } } diff --git a/vignettes/report.Rmd b/vignettes/report.Rmd index de98b9f..41a0a09 100644 --- a/vignettes/report.Rmd +++ b/vignettes/report.Rmd @@ -44,12 +44,12 @@ $Y=1$ conditionally to $X=x$ is given by $g(\langle \beta, x \rangle +b)$, where $\beta\in \R^{d}$ is the vector of regression coefficients and $b\in\R$ is the intercept. Popular examples of link functions are the logit link function where for any real $z$, $g(z)=e^z/(1+e^z)$ and the probit link function where $g(z)=\Phi(z),$ with $\Phi$ -the cumulative distribution function of the standard normal ${\cal N}(0,1)$. +the cumulative distribution function of the standard normal ${\mathcal N}(0,1)$. Both are implemented in the package. If now we want to modelise heterogeneous populations, let $K$ be the number of populations and $\omega=(\omega_1,\cdots,\omega_K)$ their weights such that -$\omega_{j}\geq 0$, $j=1,\ldots,K$ and $\sum_{j=1}^{K}\omega{j}=1$. +$\omega_{j}\geq 0$, $j=1,\ldots,K$ and $\sum_{j=1}^{K}\omega_{j}=1$. Define, for $j=1,\ldots,K$, the regression coefficients in the $j$-th population by $\beta_{j}\in\R^{d}$ and the intercept in the $j$-th population by $b_{j}\in\R$. Let $\omega =(\omega_{1},\ldots,\omega_{K})$, @@ -78,10 +78,12 @@ where $X\sim \mathcal{N}(m,\Sigma)$, $m\in \R^{d}$, $\Sigma$ a positive and symetric $d\times d$ matrix. ***** TODO: take this into account? --> The two main functions are: + * computeMu(), which estimates the parameters directions, and * optimParams(), which builds an object \code{o} to estimate all other parameters when calling \code{o$run()}, starting from the directions obtained by the previous function. + A third function is useful to run Monte-Carlo or bootstrap estimations using different models in various contexts: multiRun(). We'll show example for all of them. @@ -127,9 +129,8 @@ The other parameters are estimated by solving an optimization problem. The following function builds and return an optimization algorithm object: ```{r, results="show", include=TRUE, echo=TRUE} -M <- computeMoments(io$X, io$Y) -# X and Y must be provided if the moments matrix is not given -algopt <- optimParams(K=2, link="probit", optargs=list(M=M)) +# M <- computeMoments(io$X, io$Y) #optional (if several runs) +algopt <- optimParams(io$X, io$Y, K=2, link="probit") #, M=M) # Optimization starts at beta = mu, b = 0 and p = uniform distribution x0 <- list(beta = mu) theta <- algopt$run(x0) @@ -194,7 +195,7 @@ see ?plotHist). ```{r, results="show", include=TRUE, echo=TRUE} # Second row, first column; morpheus on the left, flexmix on the right -plotBox(mr1, 2, 1, "Target value: -1") +plotBox(mr1, 2, 1, main="Target value: -1") ``` ```{r, results="show", include=TRUE, echo=TRUE} @@ -204,7 +205,7 @@ mr2 <- multiRun(list(n=1000,p=1/2,beta=beta,b=c(0,0),optargs=list(link="logit")) function(fargs) { library(morpheus) mu <- computeMu(fargs$X, fargs$Y, fargs$optargs) - optimParams(fargs$K,fargs$link,fargs$optargs)$run(list(beta=mu))$beta + optimParams(fargs$X,fargs$Y,fargs$K,fargs$link,fargs$M)$run(list(beta=mu))$beta }, # flexmix function(fargs) { @@ -222,7 +223,7 @@ mr2 <- multiRun(list(n=1000,p=1/2,beta=beta,b=c(0,0),optargs=list(link="logit")) fargs$Y = io$Y fargs$K = ncol(fargs$beta) fargs$link = fargs$optargs$link - fargs$optargs$M = computeMoments(io$X,io$Y) + fargs$M = computeMoments(io$X,io$Y) fargs }, N=10, ncores=3) # As in example 1, align results: @@ -232,6 +233,7 @@ for (i in 1:2) ```{r, results="show", include=TRUE, echo=TRUE} # Second argument = true parameters matrix; third arg = index of method (here "morpheus") -plotCoefs(mr2, beta, 1) +plotCoefs(mr2[[1]], beta, 1) +plotCoefs(mr2[[2]], beta, 1) # Real params are on the continous line; estimations = dotted line ``` -- 2.53.0 From 44a3c357e4c06bf520b44358678a346bf8836569 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 15 Jan 2025 21:55:33 +0100 Subject: [PATCH 05/16] Add p-value computation --- vignettes/report.Rmd | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/vignettes/report.Rmd b/vignettes/report.Rmd index 41a0a09..fd2bf2f 100644 --- a/vignettes/report.Rmd +++ b/vignettes/report.Rmd @@ -237,3 +237,25 @@ plotCoefs(mr2[[1]], beta, 1) plotCoefs(mr2[[2]], beta, 1) # Real params are on the continous line; estimations = dotted line ``` + +### Computation of p-values + +In order to select the most important variables, it is natural to test wether the coefficients are zero. +That is to say, we would like to check the hypothesis $H_0: \beta_{jk} = 0$ versus $H_1: \beta_{jk} \neq 0$. +It is shown in [TODO_citation] that $\frac{\hat \beta_{jk}^2}{\hat \Var(\beta_{jk})}$ converges toward a $\Chi^2(1)$ law, +where $\hat \Var(\beta_{jk})$ is the empirical variance (computed by bootstrap). +Using this approximation, it is easy to provide a p-value for each estimated coefficient. + +```{r, results="show", include=TRUE, echo=TRUE} +pval1 <- matrix(nrow=2, ncol=2) +for (x in 1:2) { + for (y in 1:2) { + coefs <- sapply(mr2[[1]], function(m) m[x,y]) + stat_test <- mean(coefs^2) / var(coefs) + pval1[x, y] <- 1 - pchisq(stat_test, 1) + } +} +pval1 +``` + + -- 2.53.0 From 5dafd953955a3b7131ab6e192c8fc93702a4b23f Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Tue, 28 Jan 2025 17:32:04 +0100 Subject: [PATCH 06/16] Ignore some more files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index eb90000..1499b88 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/data/ /_unversioned/ #ignore roxygen2 generated files @@ -20,6 +21,7 @@ /vignettes/report.html /vignettes/report.md /vignettes/*.zip +/vignettes/report_*/ #ignore R session files .Rhistory -- 2.53.0 From fbbbae7a1de1e429b056a975618ff9d18bdfd13b Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 16 Mar 2026 02:34:26 +0100 Subject: [PATCH 07/16] Add p-value computation --- pkg/R/plot.R | 15 ++++----------- pkg/R/utils.R | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/pkg/R/plot.R b/pkg/R/plot.R index 93e9e65..b8024f5 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -8,8 +8,7 @@ # .extractParam <- function(mr, x=1, y=1) { - if (is.list(mr[[1]])) - { + if (is.list(mr[[1]])) { # Obtain L vectors where L = number of res lists in mr return ( lapply( mr, function(mr_list) { sapply(mr_list, function(m) m[x,y]) @@ -47,8 +46,7 @@ plotHist <- function(mr, x, y, ...) # Plot histograms side by side par(mfrow=c(1,L), cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1)) args <- list(...) - for (i in 1:L) - { + for (i in 1:L) { hist(params[[i]], breaks=40, freq=FALSE, xlab=ifelse("xlab" %in% names(args), args$xlab, "Parameter value"), ylab=ifelse("ylab" %in% names(args), args$ylab, "Density")) @@ -85,11 +83,8 @@ plotBox <- function(mr, x, y, ...) L <- length(params) # Plot boxplots side by side par(mfrow=c(1,L), cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1)) - args <- list(...) for (i in 1:L) - { boxplot(params[[i]], ...) - } } #' plotCoefs @@ -124,10 +119,8 @@ plotCoefs <- function(mr, params, ...) params_hat <- matrix(nrow=d, ncol=K) stdev <- matrix(nrow=d, ncol=K) - for (x in 1:d) - { - for (y in 1:K) - { + for (x in 1:d) { + for (y in 1:K) { estims <- .extractParam(mr, x, y) params_hat[x,y] <- mean(estims) # Another way to compute stdev: using distances to true params diff --git a/pkg/R/utils.R b/pkg/R/utils.R index 5b9d999..e39119e 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -18,6 +18,33 @@ normalize <- function(x) sweep(x, 2, norm2, '/') } +#' pvalue +#' +#' Compute the p-values of the tests beta[i,j] == 0 vs != 0 +#' +#' @param mr A list of matrices as output by multiRun() +#' +#' @return The matrix of p-values (same size as mr[[1]]) +#' +#' @examples +#' mr <- multiRun(...) #cf ?multiRun +#' p <- pvalue(mr[[1]]) +#' @export +pvalue <- function(mr) +{ + n = nrow(mr[[1]]) + m = ncol(mr[[1]]) + pval <- matrix(nrow=n, ncol=m) + for (x in 1:n) { + for (y in 1:m) { + coefs <- sapply(mr, function(m) m[x,y]) + stat_test <- mean(coefs^2) / var(coefs) + pval[x, y] <- 1 - pchisq(stat_test, 1) + } + } + pval +} + # Computes a tensor-vector product # # @param Te third-order tensor (size dxdxd) -- 2.53.0 From ae0a930271f8b53f7882facd69e2fe52ab28cc24 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 27 Mar 2026 12:20:00 +0100 Subject: [PATCH 08/16] Prepare package for re-upload to CRAN --- comments.md | 14 ++++++++++++++ pkg/DESCRIPTION | 17 +++++++++++------ pkg/NAMESPACE | 14 ++------------ pkg/R/A_NAMESPACE.R | 2 +- pkg/R/utils.R | 3 ++- 5 files changed, 30 insertions(+), 20 deletions(-) create mode 100644 comments.md diff --git a/comments.md b/comments.md new file mode 100644 index 0000000..14b8ba5 --- /dev/null +++ b/comments.md @@ -0,0 +1,14 @@ +There were 3 NOTEs: + +1. "New submission / Package was archived on CRAN" + - This version addresses all previous issues that led to archiving, at least as far as I can check (R CMD check --as-cran is OK). I forgot the previous issues. + +2. "checking for future file timestamps ... NOTE: unable to verify current time" + - This appears to be an environmental issue on my machine during check and is not a property of the package files themselves (I reset all timestamps with "find . -exec touch {} +" ) + +3. "checking compilation flags used ... NOTE: Compilation used the following non-portable flag(s): ..." + - These flags are the default configuration of the R installation on my machine. They are not hardcoded in the package's src/Makevars. + +==== + +The version only changes to 1.0.5 because I just added the pvalue() function to the file R/utils.R. diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index d3b5427..0fb4a38 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -5,12 +5,17 @@ Description: Mixture of logistic regressions parameters (H)estimation with (U)spectral methods. The main methods take d-dimensional inputs and a vector of binary outputs, and return parameters according to the GLMs mixture model (General Linear Model). For more details see chapter 3 in the PhD thesis of - Mor-Absa Loum: , available here + Mor-Absa Loum: , available here . -Version: 1.0-4 -Author: Benjamin Auder [aut,cre], - Mor-Absa Loum [aut] -Maintainer: Benjamin Auder +Version: 1.0-5 +Authors@R: c(person(given = "Benjamin", + family = "Auder", + role = c("aut", "cre"), + email = "benjamin.auder@universite-paris-saclay.fr"), + person(given = "Mor-Absa", + family = "Loum", + role = "aut", + email = "morabsa.loum@univ-thies.sn")) Depends: R (>= 3.5.0), Imports: @@ -25,7 +30,7 @@ Suggests: testthat (>= 3.0.0), roxygen2 License: MIT + file LICENSE -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 URL: https://github.com/yagu0/morpheus Collate: 'utils.R' diff --git a/pkg/NAMESPACE b/pkg/NAMESPACE index 2c6f6a1..75c3ec6 100644 --- a/pkg/NAMESPACE +++ b/pkg/NAMESPACE @@ -10,19 +10,9 @@ export(optimParams) export(plotBox) export(plotCoefs) export(plotHist) -importFrom(graphics,barplot) -importFrom(graphics,boxplot) -importFrom(graphics,hist) -importFrom(graphics,matplot) -importFrom(graphics,par) +importFrom(graphics,barplot,boxplot,hist,matplot,par) importFrom(jointDiag,ajd) importFrom(methods,new) importFrom(pracma,integral) -importFrom(stats,integrate) -importFrom(stats,pnorm) -importFrom(stats,rbinom) -importFrom(stats,rmultinom) -importFrom(stats,rnorm) -importFrom(stats,runif) -importFrom(stats,sd) +importFrom(stats,integrate,pnorm,rbinom,rmultinom,rnorm,runif,sd,pchisq,var) useDynLib(morpheus, .registration = TRUE) diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R index ab9ea34..ebe0135 100644 --- a/pkg/R/A_NAMESPACE.R +++ b/pkg/R/A_NAMESPACE.R @@ -3,7 +3,7 @@ #' @useDynLib morpheus, .registration = TRUE #' #' @importFrom jointDiag ajd -#' @importFrom stats rbinom rmultinom rnorm pnorm runif integrate sd +#' @importFrom stats rbinom rmultinom rnorm pnorm runif integrate sd var pchisq #' @importFrom graphics boxplot barplot hist par matplot #' @importFrom methods new #' @importFrom pracma integral diff --git a/pkg/R/utils.R b/pkg/R/utils.R index e39119e..06fe25f 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -27,7 +27,8 @@ normalize <- function(x) #' @return The matrix of p-values (same size as mr[[1]]) #' #' @examples -#' mr <- multiRun(...) #cf ?multiRun +#' # Next line should be a real call to multiRun() +#' mr <- list( list(matrix(c(1,2,3,4),ncol=2),matrix(c(2,2,1,1),ncol=2)) ) #' p <- pvalue(mr[[1]]) #' @export pvalue <- function(mr) -- 2.53.0 From a0b1459f82ab1cb07b3047fd000643fce9bb55b0 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 13 Apr 2026 23:31:05 +0200 Subject: [PATCH 09/16] Remove old TODO file --- TODO | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 TODO diff --git a/TODO b/TODO deleted file mode 100644 index eff3514..0000000 --- a/TODO +++ /dev/null @@ -1,3 +0,0 @@ -Intégrer calcul p-value --> cf. article -Framework figure --> faire workflow - -- 2.53.0 From 8191f337eaab1bfb69e74e5a2b0337863361d409 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Tue, 14 Apr 2026 11:00:13 +0200 Subject: [PATCH 10/16] Add seeds to tests so that R check never fail --- pkg/tests/testthat/test-alignMatrices.R | 2 ++ pkg/tests/testthat/test-computeMu.R | 1 + pkg/tests/testthat/test-hungarianAlgorithm.R | 1 + pkg/tests/testthat/test-jointDiag.R | 2 ++ 4 files changed, 6 insertions(+) diff --git a/pkg/tests/testthat/test-alignMatrices.R b/pkg/tests/testthat/test-alignMatrices.R index 63e4bd3..ca18e0c 100644 --- a/pkg/tests/testthat/test-alignMatrices.R +++ b/pkg/tests/testthat/test-alignMatrices.R @@ -13,6 +13,7 @@ test_that("labelSwitchingAlign correctly aligns de-noised parameters", { + set.seed(32) N <- 30 #number of matrices d_K_list <- list(c(2,2), c(5,3)) for (i in 1:2) @@ -41,6 +42,7 @@ test_that("labelSwitchingAlign correctly aligns de-noised parameters", test_that("labelSwitchingAlign correctly aligns noisy parameters", { + set.seed(32) N <- 30 #number of matrices d_K_list <- list(c(2,2), c(5,3)) for (i in 1:2) diff --git a/pkg/tests/testthat/test-computeMu.R b/pkg/tests/testthat/test-computeMu.R index c1fd4f9..44ad9eb 100644 --- a/pkg/tests/testthat/test-computeMu.R +++ b/pkg/tests/testthat/test-computeMu.R @@ -1,5 +1,6 @@ test_that("on input of sufficient size, β/||β|| is estimated accurately enough", { + set.seed(42) n <- 100000 d <- 2 K <- 2 diff --git a/pkg/tests/testthat/test-hungarianAlgorithm.R b/pkg/tests/testthat/test-hungarianAlgorithm.R index 4eacbb9..db41935 100644 --- a/pkg/tests/testthat/test-hungarianAlgorithm.R +++ b/pkg/tests/testthat/test-hungarianAlgorithm.R @@ -1,5 +1,6 @@ test_that("HungarianAlgorithm provides the correct answer on various inputs", { + set.seed(42) for (n in c(2,3,10,50)) { for (repetition in 1:3) diff --git a/pkg/tests/testthat/test-jointDiag.R b/pkg/tests/testthat/test-jointDiag.R index 3fb057c..cf87b17 100644 --- a/pkg/tests/testthat/test-jointDiag.R +++ b/pkg/tests/testthat/test-jointDiag.R @@ -1,6 +1,7 @@ #auxiliary to test diagonality .computeMuCheckDiag = function(X, Y, K, jd_method, β_ref) { + set.seed(99) d <- ncol(X) #TODO: redundant code, same as computeMu() main method. Comments are stripped M3 <- .Moments_M3(X,Y) @@ -30,6 +31,7 @@ test_that("'jedi' and 'uwedge' joint-diagonalization methods return a correct matrix", { + set.seed(99) n <- 10000 d_K <- list( c(2,2), c(5,3), c(20,13) ) -- 2.53.0 From 5d988751646717e5fc3fac8c9fdef61eb24ab832 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 17 Apr 2026 23:44:40 +0200 Subject: [PATCH 11/16] No more need for this file --- comments.md | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 comments.md diff --git a/comments.md b/comments.md deleted file mode 100644 index 14b8ba5..0000000 --- a/comments.md +++ /dev/null @@ -1,14 +0,0 @@ -There were 3 NOTEs: - -1. "New submission / Package was archived on CRAN" - - This version addresses all previous issues that led to archiving, at least as far as I can check (R CMD check --as-cran is OK). I forgot the previous issues. - -2. "checking for future file timestamps ... NOTE: unable to verify current time" - - This appears to be an environmental issue on my machine during check and is not a property of the package files themselves (I reset all timestamps with "find . -exec touch {} +" ) - -3. "checking compilation flags used ... NOTE: Compilation used the following non-portable flag(s): ..." - - These flags are the default configuration of the R installation on my machine. They are not hardcoded in the package's src/Makevars. - -==== - -The version only changes to 1.0.5 because I just added the pvalue() function to the file R/utils.R. -- 2.53.0 From f5219ea9aec60e97536eb1999738e0718c3ddab5 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Sat, 18 Apr 2026 00:36:25 +0200 Subject: [PATCH 12/16] Set seed for all unit tests --- pkg/tests/testthat/test-optimParams.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/tests/testthat/test-optimParams.R b/pkg/tests/testthat/test-optimParams.R index a8b8909..0a4e8d4 100644 --- a/pkg/tests/testthat/test-optimParams.R +++ b/pkg/tests/testthat/test-optimParams.R @@ -48,6 +48,7 @@ naive_f <- function(link, M1,M2,M3, p,β,b) # TODO: understand why delta is so large (should be 10^-6 10^-7 ...) test_that("naive computation provides the same result as vectorized computations", { + set.seed(7) h <- 1e-7 #for finite-difference tests n <- 10 for (dK in list( c(2,2), c(5,3))) @@ -88,6 +89,7 @@ test_that("naive computation provides the same result as vectorized computations test_that("W computed in C and in R are the same", { + set.seed(7) tol <- 1e-8 n <- 10 for (dK in list( c(2,2))) #, c(5,3))) -- 2.53.0 From 41aa314da34e84c87e4b46120aaeaf480978cb21 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 20 Apr 2026 19:00:53 +0200 Subject: [PATCH 13/16] Add CITATION file, improve license infos --- pkg/.gitignore | 3 +++ pkg/DESCRIPTION | 3 ++- pkg/LICENSE | 15 +++++++++++++-- pkg/inst/CITATION | 12 ++++++++++++ pkg/vignettes/.gitignore | 2 ++ pkg/vignettes/JSS_article.pdf | 1 + 6 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 pkg/.gitignore create mode 100644 pkg/inst/CITATION create mode 100644 pkg/vignettes/.gitignore create mode 100644 pkg/vignettes/JSS_article.pdf diff --git a/pkg/.gitignore b/pkg/.gitignore new file mode 100644 index 0000000..236428a --- /dev/null +++ b/pkg/.gitignore @@ -0,0 +1,3 @@ +/inst/doc +/vignettes/*.R +/vignettes/*.html diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index 0fb4a38..e9866ae 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -29,7 +29,8 @@ Suggests: parallel, testthat (>= 3.0.0), roxygen2 -License: MIT + file LICENSE +License: ISC + file LICENSE +Date: 2026-04-20 RoxygenNote: 7.3.3 URL: https://github.com/yagu0/morpheus Collate: diff --git a/pkg/LICENSE b/pkg/LICENSE index c60fa72..64fe734 100644 --- a/pkg/LICENSE +++ b/pkg/LICENSE @@ -1,2 +1,13 @@ -YEAR: 2016-2020 -COPYRIGHT HOLDER: Mor-Absa Loum, Benjamin Auder +Copyright (C) 2016-2026 Benjamin AUDER, Mor-Absa LOUM + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/pkg/inst/CITATION b/pkg/inst/CITATION new file mode 100644 index 0000000..afe73b1 --- /dev/null +++ b/pkg/inst/CITATION @@ -0,0 +1,12 @@ +bibentry( + bibtype = "Manual", + title = "morpheus: Estimate Parameters of Mixtures of Logistic Regressions", + author = c( + person("Benjamin", "Auder"), + person("Mor-Absa", "Loum") + ), + year = "2026", + note = "R package version 1.0-5", + url = "https://github.com/yagu0/morpheus", + header = "To cite package ‘morpheus’ in publications use:" +) diff --git a/pkg/vignettes/.gitignore b/pkg/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/pkg/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/pkg/vignettes/JSS_article.pdf b/pkg/vignettes/JSS_article.pdf new file mode 100644 index 0000000..3ff1fb4 --- /dev/null +++ b/pkg/vignettes/JSS_article.pdf @@ -0,0 +1 @@ +#$# git-fat f33784b937cb1fbbc0d7d700075dec297dcb2504 769072 -- 2.53.0 From b282fc9e5c15b72530b0741dc6c4d530cffd49f6 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 20 Apr 2026 23:35:29 +0200 Subject: [PATCH 14/16] Fix multiRun, add vignette = JSS article --- pkg/.gitignore | 3 --- pkg/DESCRIPTION | 4 ++-- pkg/LICENSE | 15 ++------------- pkg/R/multiRun.R | 21 ++++++++++++++------- pkg/inst/doc/JSS_article.pdf | 1 + pkg/vignettes/.gitignore | 2 -- pkg/vignettes/JSS_article.pdf | 1 - 7 files changed, 19 insertions(+), 28 deletions(-) delete mode 100644 pkg/.gitignore create mode 100644 pkg/inst/doc/JSS_article.pdf delete mode 100644 pkg/vignettes/.gitignore delete mode 100644 pkg/vignettes/JSS_article.pdf diff --git a/pkg/.gitignore b/pkg/.gitignore deleted file mode 100644 index 236428a..0000000 --- a/pkg/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/inst/doc -/vignettes/*.R -/vignettes/*.html diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index e9866ae..9f5c9c4 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -7,7 +7,7 @@ Description: Mixture of logistic regressions parameters (H)estimation with (General Linear Model). For more details see chapter 3 in the PhD thesis of Mor-Absa Loum: , available here . -Version: 1.0-5 +Version: 1.0-6 Authors@R: c(person(given = "Benjamin", family = "Auder", role = c("aut", "cre"), @@ -29,7 +29,7 @@ Suggests: parallel, testthat (>= 3.0.0), roxygen2 -License: ISC + file LICENSE +License: MIT + file LICENSE Date: 2026-04-20 RoxygenNote: 7.3.3 URL: https://github.com/yagu0/morpheus diff --git a/pkg/LICENSE b/pkg/LICENSE index 64fe734..34c9bc4 100644 --- a/pkg/LICENSE +++ b/pkg/LICENSE @@ -1,13 +1,2 @@ -Copyright (C) 2016-2026 Benjamin AUDER, Mor-Absa LOUM - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +YEAR: 2016-2026 +COPYRIGHT HOLDER: Benjamin Auder, Mor-Absa Loum diff --git a/pkg/R/multiRun.R b/pkg/R/multiRun.R index e2633c1..767f6f9 100644 --- a/pkg/R/multiRun.R +++ b/pkg/R/multiRun.R @@ -11,6 +11,7 @@ #' #' @param fargs List of arguments for the estimation functions #' @param estimParams List of nf function(s) to apply on fargs +#' @param packages Vector of packages to load on each node (default: morpheus) #' @param prepareArgs Prepare arguments for the functions inside estimParams #' @param N Number of runs #' @param ncores Number of cores for parallel runs (<=1: sequential) @@ -29,13 +30,11 @@ #' res <- multiRun(list(X=io$X,Y=io$Y,K=2), list( #' # morpheus #' function(fargs) { -#' library(morpheus) #' ind <- fargs$ind #' computeMu(fargs$X[ind,], fargs$Y[ind], list(K=fargs$K)) #' }, #' # flexmix #' function(fargs) { -#' library(flexmix) #' ind <- fargs$ind #' K <- fargs$K #' dat <- as.data.frame( cbind(fargs$Y[ind],fargs$X[ind,]) ) @@ -43,6 +42,7 @@ #' model=FLXMRglm(family="binomial") ) ) #' normalize( matrix(out@@coef[1:(ncol(fargs$X)*K)], ncol=K) ) #' } ), +#' packages = c("morpheus", "flexmix"), #' prepareArgs = function(fargs,index) { #' if (index == 1) #' fargs$ind <- 1:nrow(fargs$X) @@ -57,7 +57,6 @@ #' res <- multiRun(list(n=1000,p=1/2,β=β,b=c(0,0),link="logit"), list( #' # morpheus #' function(fargs) { -#' library(morpheus) #' K <- fargs$K #' μ <- computeMu(fargs$X, fargs$Y, list(K=fargs$K)) #' o <- optimParams(fargs$X, fargs$Y, fargs$K, fargs$link, fargs$M) @@ -65,7 +64,6 @@ #' }, #' # flexmix #' function(fargs) { -#' library(flexmix) #' K <- fargs$K #' dat <- as.data.frame( cbind(fargs$Y,fargs$X) ) #' out <- refit( flexmix( cbind(V1, 1 - V1) ~ ., data=dat, k=K, @@ -73,6 +71,7 @@ #' sapply( seq_len(K), function(i) #' as.double( out@@components[[1]][[i]][2:(1+ncol(fargs$X)),1] ) ) #' } ), +#' packages = c("morpheus", "flexmix"), #' prepareArgs = function(fargs,index) { #' library(morpheus) #' io <- generateSampleIO(fargs$n, fargs$p, fargs$β, fargs$b, fargs$link) @@ -86,7 +85,7 @@ #' for (i in 1:2) #' res[[i]] <- alignMatrices(res[[i]], ref=β, ls_mode="exact")} #' @export -multiRun <- function(fargs, estimParams, +multiRun <- function(fargs, estimParams, packages = c("morpheus"), prepareArgs = function(x,i) x, N=10, ncores=3, agg=lapply, verbose=FALSE) { if (!is.list(fargs)) @@ -118,15 +117,23 @@ multiRun <- function(fargs, estimParams, }) } + loadPackages <- function() { + for (p in packages) + library(p, character.only = TRUE) + } + if (ncores > 1) { cl <- parallel::makeCluster(ncores, outfile="") - parallel::clusterExport(cl, c("fargs","verbose"), environment()) + parallel::clusterExport(cl, c("fargs", "verbose", "loadPackages"), environment()) + parallel::clusterEvalQ(cl, loadPackages() ) list_res <- parallel::clusterApplyLB(cl, 1:N, estimParamAtIndex) parallel::stopCluster(cl) } - else + else { + loadPackages() list_res <- lapply(1:N, estimParamAtIndex) + } # De-interlace results: output one list per function nf <- length(estimParams) diff --git a/pkg/inst/doc/JSS_article.pdf b/pkg/inst/doc/JSS_article.pdf new file mode 100644 index 0000000..0b930dc --- /dev/null +++ b/pkg/inst/doc/JSS_article.pdf @@ -0,0 +1 @@ +#$# git-fat 51b69cedf9b3a4397c353799b5e03a0b6254219a 404201 diff --git a/pkg/vignettes/.gitignore b/pkg/vignettes/.gitignore deleted file mode 100644 index 097b241..0000000 --- a/pkg/vignettes/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.html -*.R diff --git a/pkg/vignettes/JSS_article.pdf b/pkg/vignettes/JSS_article.pdf deleted file mode 100644 index 3ff1fb4..0000000 --- a/pkg/vignettes/JSS_article.pdf +++ /dev/null @@ -1 +0,0 @@ -#$# git-fat f33784b937cb1fbbc0d7d700075dec297dcb2504 769072 -- 2.53.0 From af3f0d5c755aaf4ced59c43106bb6c1d53e6520c Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 22 Apr 2026 17:51:29 +0200 Subject: [PATCH 15/16] Fix package to re-upload on CRAN. TODO: check tests/example, write inst/extdata/simulateMr.R --- .gitignore | 9 +-- .../LeastSquareMoments_Auder.et.al_2020.pdf | 0 .../Properties.of.GMM_Hansen_1982.pdf | 0 ...orDecompositions_Anandkumar.et.al_2014.pdf | 0 ...ensorFactorization_Kuleshov.et.al_2015.pdf | 0 .../TensorMethods_Anandkumar.et.al_2016.pdf | 0 pkg/R/multiRun.R | 7 +- pkg/R/optimParams.R | 2 +- pkg/R/plot.R | 65 +++++++++++-------- pkg/R/utils.R | 2 + .../inst/extdata}/FLXMRglm.R | 3 + {patch_Bettina => pkg/inst/extdata}/code.R | 0 pkg/inst/extdata/simulateMr.R | 22 +++++++ 13 files changed, 75 insertions(+), 35 deletions(-) rename {doc => doc_PDF}/LeastSquareMoments_Auder.et.al_2020.pdf (100%) rename {doc => doc_PDF}/Properties.of.GMM_Hansen_1982.pdf (100%) rename {doc => doc_PDF}/TensorDecompositions_Anandkumar.et.al_2014.pdf (100%) rename {doc => doc_PDF}/TensorFactorization_Kuleshov.et.al_2015.pdf (100%) rename {doc => doc_PDF}/TensorMethods_Anandkumar.et.al_2016.pdf (100%) rename {patch_Bettina => pkg/inst/extdata}/FLXMRglm.R (97%) rename {patch_Bettina => pkg/inst/extdata}/code.R (100%) create mode 100644 pkg/inst/extdata/simulateMr.R diff --git a/.gitignore b/.gitignore index 1499b88..68db326 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/data/ /_unversioned/ #ignore roxygen2 generated files @@ -18,18 +17,16 @@ !/reports/*.sh /vignettes/cache/ /vignettes/figure/ -/vignettes/report.html -/vignettes/report.md +/vignettes/*.html /vignettes/*.zip -/vignettes/report_*/ #ignore R session files .Rhistory .RData #ignore R CMD build/check generated files -/*.Rcheck/ -/*.tar.gz +*.Rcheck/ +*.tar.gz #ignore generated object files *.[oa] diff --git a/doc/LeastSquareMoments_Auder.et.al_2020.pdf b/doc_PDF/LeastSquareMoments_Auder.et.al_2020.pdf similarity index 100% rename from doc/LeastSquareMoments_Auder.et.al_2020.pdf rename to doc_PDF/LeastSquareMoments_Auder.et.al_2020.pdf diff --git a/doc/Properties.of.GMM_Hansen_1982.pdf b/doc_PDF/Properties.of.GMM_Hansen_1982.pdf similarity index 100% rename from doc/Properties.of.GMM_Hansen_1982.pdf rename to doc_PDF/Properties.of.GMM_Hansen_1982.pdf diff --git a/doc/TensorDecompositions_Anandkumar.et.al_2014.pdf b/doc_PDF/TensorDecompositions_Anandkumar.et.al_2014.pdf similarity index 100% rename from doc/TensorDecompositions_Anandkumar.et.al_2014.pdf rename to doc_PDF/TensorDecompositions_Anandkumar.et.al_2014.pdf diff --git a/doc/TensorFactorization_Kuleshov.et.al_2015.pdf b/doc_PDF/TensorFactorization_Kuleshov.et.al_2015.pdf similarity index 100% rename from doc/TensorFactorization_Kuleshov.et.al_2015.pdf rename to doc_PDF/TensorFactorization_Kuleshov.et.al_2015.pdf diff --git a/doc/TensorMethods_Anandkumar.et.al_2016.pdf b/doc_PDF/TensorMethods_Anandkumar.et.al_2016.pdf similarity index 100% rename from doc/TensorMethods_Anandkumar.et.al_2016.pdf rename to doc_PDF/TensorMethods_Anandkumar.et.al_2016.pdf diff --git a/pkg/R/multiRun.R b/pkg/R/multiRun.R index 767f6f9..88207fd 100644 --- a/pkg/R/multiRun.R +++ b/pkg/R/multiRun.R @@ -21,9 +21,13 @@ #' @return A list of nf aggregates of N results (matrices). #' #' @examples -#' \dontrun{ +#' \donttest{ #' β <- matrix(c(1,-2,3,1),ncol=2) #' +#' # This example requires a patch for the flexmix package written by Bettina Grün: +#' patch_path <- system.file("extdata", "FLXMRglm.R", package = "morpheus") +#' source(patch_path) +#' #' # Bootstrap + computeMu, morpheus VS flexmix #' io <- generateSampleIO(n=1000, p=1/2, β=β, b=c(0,0), "logit") #' μ <- normalize(β) @@ -84,6 +88,7 @@ #' }, N=10, ncores=3) #' for (i in 1:2) #' res[[i]] <- alignMatrices(res[[i]], ref=β, ls_mode="exact")} +#' #' @export multiRun <- function(fargs, estimParams, packages = c("morpheus"), prepareArgs = function(x,i) x, N=10, ncores=3, agg=lapply, verbose=FALSE) diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R index c7b9898..c050e63 100644 --- a/pkg/R/optimParams.R +++ b/pkg/R/optimParams.R @@ -30,7 +30,7 @@ #' 1/2, matrix(c(1,-2,3,1),ncol=2), c(0,0), "logit") #' μ <- computeMu(io$X, io$Y, list(K=2)) #' o <- optimParams(io$X, io$Y, 2, "logit") -#' \dontrun{ +#' \donttest{ #' θ0 <- list(p=1/2, β=μ, b=c(0,0)) #' par0 <- o$run(θ0) #' # Compare with another starting point diff --git a/pkg/R/plot.R b/pkg/R/plot.R index b8024f5..f76017d 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -29,18 +29,21 @@ #' @param ... Additional graphical parameters (xlab, ylab, ...) #' #' @examples -#' \dontrun{ -#' β <- matrix(c(1,-2,3,1),ncol=2) -#' mr <- multiRun(...) #see bootstrap example in ?multiRun -#' #mr[[i]] is a list of estimated parameters matrices -#' μ <- normalize(β) -#' for (i in 1:2) -#' mr[[i]] <- alignMatrices(res[[i]], ref=μ, ls_mode="exact") -#' plotHist(mr, 2, 1) #second row, first column} +#' # mr[[i]] is a list of estimated parameters matrices (here random matrices). +#' # Should be mr <- multiRun(...) --> see bootstrap example in ?multiRun. +#' simmr_path <- system.file("extdata", "simulateMr.R", package = "morpheus") +#' source(simmr_path) +#' mr <- simulateMr(c(2,2), 10)$mr +#' plotHist(mr, 2, 1) #second row, first column +#' +#' @return No return value, called for side effects. #' #' @export plotHist <- function(mr, x, y, ...) { + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + params <- .extractParam(mr, x, y) L <- length(params) # Plot histograms side by side @@ -51,6 +54,7 @@ plotHist <- function(mr, x, y, ...) xlab=ifelse("xlab" %in% names(args), args$xlab, "Parameter value"), ylab=ifelse("ylab" %in% names(args), args$ylab, "Density")) } + NULL } # NOTE: roxygen2 bug, "@inheritParams plotHist" fails in next header: @@ -67,24 +71,29 @@ plotHist <- function(mr, x, y, ...) #' @param ... Additional graphical parameters (xlab, ylab, ...) #' #' @examples -#' \dontrun{ -#' β <- matrix(c(1,-2,3,1),ncol=2) -#' mr <- multiRun(...) #see bootstrap example in ?multiRun -#' #mr[[i]] is a list of estimated parameters matrices -#' μ <- normalize(β) -#' for (i in 1:2) -#' mr[[i]] <- alignMatrices(res[[i]], ref=μ, ls_mode="exact") -#' plotBox(mr, 2, 1) #second row, first column} +#' # mr[[i]] is a list of estimated parameters matrices (here random matrices). +#' # Should be mr <- multiRun(...) --> see bootstrap example in ?multiRun. +#' simmr_path <- system.file("extdata", "simulateMr.R", package = "morpheus") +#' source(simmr_path) +#' source(simmr_path) +#' mr <- simulateMr(c(2,2), 10)$mr +#' plotBox(mr, 2, 1) #second row, first column +#' +#' @return No return value, called for side effects. #' #' @export plotBox <- function(mr, x, y, ...) { + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + params <- .extractParam(mr, x, y) L <- length(params) # Plot boxplots side by side par(mfrow=c(1,L), cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1)) for (i in 1:L) boxplot(params[[i]], ...) + NULL } #' plotCoefs @@ -101,19 +110,22 @@ plotBox <- function(mr, x, y, ...) #' @param ... Additional graphical parameters #' #' @examples -#' \dontrun{ -#' β <- matrix(c(1,-2,3,1),ncol=2) -#' mr <- multiRun(...) #see bootstrap example in ?multiRun -#' #mr[[i]] is a list of estimated parameters matrices -#' μ <- normalize(β) -#' for (i in 1:2) -#' mr[[i]] <- alignMatrices(res[[i]], ref=μ, ls_mode="exact") -#' params <- rbind( c(.5,.5), β, c(0,0) ) #p, β, b stacked in a matrix -#' plotCoefs(mr[[1]], params)} +#' # mr[[i]] is a list of estimated parameters matrices (here random matrices). +#' # Should be mr <- multiRun(...) --> see bootstrap example in ?multiRun. +#' simmr_path <- system.file("extdata", "simulateMr.R", package = "morpheus") +#' source(simmr_path) +#' mr_θ <- simulateMr(c(3,2), 10) +#' mr <- mr_θ$mr ; θ <- mr_θ$θ +#' plotCoefs(mr[[1]], θ) +#' +#' @return No return value, called for side effects. #' #' @export plotCoefs <- function(mr, params, ...) { + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + d <- nrow(mr[[1]]) K <- ncol(mr[[1]]) @@ -142,6 +154,5 @@ plotCoefs <- function(mr, params, ...) col=1, lty=c(1,5,3,3), type="l", lwd=2, xlab=ifelse("xlab" %in% names(args), args$xlab, "Parameter index"), ylab=ifelse("ylab" %in% names(args), args$ylab, "") ) - - #print(o) #not returning o to avoid weird Jupyter issue... (TODO:) + NULL } diff --git a/pkg/R/utils.R b/pkg/R/utils.R index 06fe25f..83cb0dc 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -10,6 +10,7 @@ #' x <- matrix(c(1,2,-1,3), ncol=2) #' normalize(x) #column 1 is 1/sqrt(5) (1 2), #' #and column 2 is 1/sqrt(10) (-1, 3) +#' #' @export normalize <- function(x) { @@ -30,6 +31,7 @@ normalize <- function(x) #' # Next line should be a real call to multiRun() #' mr <- list( list(matrix(c(1,2,3,4),ncol=2),matrix(c(2,2,1,1),ncol=2)) ) #' p <- pvalue(mr[[1]]) +#' #' @export pvalue <- function(mr) { diff --git a/patch_Bettina/FLXMRglm.R b/pkg/inst/extdata/FLXMRglm.R similarity index 97% rename from patch_Bettina/FLXMRglm.R rename to pkg/inst/extdata/FLXMRglm.R index 30fbff6..9af08f7 100644 --- a/patch_Bettina/FLXMRglm.R +++ b/pkg/inst/extdata/FLXMRglm.R @@ -1,3 +1,6 @@ +# Patch provided by Bettina Grün (flexmix author). +# https://cran.r-project.org/web/packages/flexmix/index.html + FLXMRglm <- function(formula=.~., family=gaussian, offset=NULL) { if (is.character(family)) diff --git a/patch_Bettina/code.R b/pkg/inst/extdata/code.R similarity index 100% rename from patch_Bettina/code.R rename to pkg/inst/extdata/code.R diff --git a/pkg/inst/extdata/simulateMr.R b/pkg/inst/extdata/simulateMr.R new file mode 100644 index 0000000..c09d7a4 --- /dev/null +++ b/pkg/inst/extdata/simulateMr.R @@ -0,0 +1,22 @@ +# simulateMr +# +# Simulate an output of multiRun(...), because this function is a little +# complicated to call in an example (see ?multiRun). +# +#' β <- matrix(c(1,-2,3,1),ncol=2) + + +# theta, not beta here (more general) +.simulateMr( +#' # mr[[i]] is a list of estimated parameters matrices (here random matrices). +#' # Should be mr <- multiRun(...) --> see bootstrap example in ?multiRun. +#' mr <- list() +#' μ <- normalize(β) +#' for (i in 1:2) { +#' mr[[i]] <- list() +#' for (j in 1:3) +#' mr[[i]][[j]] <- β + matrix(rnorm(4,sd=0.25),ncol=2) +#' mr[[i]] <- alignMatrices(mr[[i]], ref=β, ls_mode="exact") --> TODO: align in same function +#' } + + list(theta = ..., mr = ...) -- 2.53.0 From 23514a24545cd870763be0419b540e372acff08d Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 22 Apr 2026 22:30:20 +0200 Subject: [PATCH 16/16] Complete preparation of package for CRAN upload --- comments_CRAN | 19 +++ pkg/DESCRIPTION | 4 +- pkg/R/multiRun.R | 4 +- pkg/inst/extdata/FLXMRglm.R | 28 ++++ pkg/inst/extdata/code.R | 25 ---- pkg/inst/extdata/simulateMr.R | 36 ++--- vignettes/report.md | 267 ++++++++++++++++++++++++++++++++++ 7 files changed, 337 insertions(+), 46 deletions(-) create mode 100644 comments_CRAN delete mode 100644 pkg/inst/extdata/code.R create mode 100644 vignettes/report.md diff --git a/comments_CRAN b/comments_CRAN new file mode 100644 index 0000000..b099fc7 --- /dev/null +++ b/comments_CRAN @@ -0,0 +1,19 @@ +I did my best to fix the issues as indicated by Konstanze (thank you ! I wasn't aware of on.exit at least..). + +> Please add \value to .Rd files regarding exported methods. + +I added "\value{No return value, called for side effects}" to all plotting functions, and explicitely return NULL. + +> \dontrun{} should only be used if the example really cannot be executed. Please replace \dontrun with \donttest. + +Done: now only two \donttest and 0 \dontrun. I added R files under inst/extdata useful to run these examples, hoping it'll be fine. + +> Please make sure that you do not change the user's options, par. + +Added these two lines at the beginning of the three graphical functions: +oldpar <- par(no.readonly = TRUE) +on.exit(par(oldpar)) + +===== + +To be complete, I also improved the multiRun() function now taking a vector of packages to load on each node (wether computations are parallelized or not). diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index 9f5c9c4..055c203 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -7,7 +7,7 @@ Description: Mixture of logistic regressions parameters (H)estimation with (General Linear Model). For more details see chapter 3 in the PhD thesis of Mor-Absa Loum: , available here . -Version: 1.0-6 +Version: 1.0-5 Authors@R: c(person(given = "Benjamin", family = "Auder", role = c("aut", "cre"), @@ -30,7 +30,7 @@ Suggests: testthat (>= 3.0.0), roxygen2 License: MIT + file LICENSE -Date: 2026-04-20 +Date: 2026-04-22 RoxygenNote: 7.3.3 URL: https://github.com/yagu0/morpheus Collate: diff --git a/pkg/R/multiRun.R b/pkg/R/multiRun.R index 88207fd..ddb463b 100644 --- a/pkg/R/multiRun.R +++ b/pkg/R/multiRun.R @@ -53,7 +53,7 @@ #' else #' fargs$ind <- sample(1:nrow(fargs$X),replace=TRUE) #' fargs -#' }, N=10, ncores=3) +#' }, N=10, ncores=1) #ncores=3 #' for (i in 1:2) #' res[[i]] <- alignMatrices(res[[i]], ref=μ, ls_mode="exact") #' @@ -85,7 +85,7 @@ #' fargs$link <- fargs$link #' fargs$M <- computeMoments(io$X,io$Y) #' fargs -#' }, N=10, ncores=3) +#' }, N=10, ncores=1) #ncores=3 #' for (i in 1:2) #' res[[i]] <- alignMatrices(res[[i]], ref=β, ls_mode="exact")} #' diff --git a/pkg/inst/extdata/FLXMRglm.R b/pkg/inst/extdata/FLXMRglm.R index 9af08f7..e1de00b 100644 --- a/pkg/inst/extdata/FLXMRglm.R +++ b/pkg/inst/extdata/FLXMRglm.R @@ -149,3 +149,31 @@ FLXMRglm <- function(formula=.~., family=gaussian, offset=NULL) else stop(paste("Unknown family", family)) z } + +# Example usage: + +#library("flexmix") +#data("tribolium", package = "flexmix") +#set.seed(1234) +### uses FLXMRglm() from package. +#tribMix <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, +# k = 2, nrep = 5, data = tribolium, +# model = FLXMRglm(family = "binomial")) +#parameters(tribMix); logLik(tribMix) +##source("FLXMRglm.R") +#set.seed(1234) +### uses FLXMRglm() from source file which allows to specify link. +#tribMixNew <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, +# k = 2, nrep = 5, data = tribolium, +# model = FLXMRglm(family = "binomial")) +#parameters(tribMixNew); logLik(tribMixNew) +#set.seed(1234) +#tribMixlogit <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, +# k = 2, nrep = 5, data = tribolium, +# model = FLXMRglm(family = binomial(link = logit))) +#parameters(tribMixlogit); logLik(tribMixlogit) +#set.seed(1234) +#tribMixprobit <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, +# k = 2, nrep = 5, data = tribolium, +# model = FLXMRglm(family = binomial(link = probit))) +#parameters(tribMixprobit); logLik(tribMixprobit) diff --git a/pkg/inst/extdata/code.R b/pkg/inst/extdata/code.R deleted file mode 100644 index 829d82d..0000000 --- a/pkg/inst/extdata/code.R +++ /dev/null @@ -1,25 +0,0 @@ -library("flexmix") -data("tribolium", package = "flexmix") -set.seed(1234) -## uses FLXMRglm() from package. -tribMix <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, - k = 2, nrep = 5, data = tribolium, - model = FLXMRglm(family = "binomial")) -parameters(tribMix); logLik(tribMix) -source("FLXMRglm.R") -set.seed(1234) -## uses FLXMRglm() from source file which allows to specify link. -tribMixNew <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, - k = 2, nrep = 5, data = tribolium, - model = FLXMRglm(family = "binomial")) -parameters(tribMixNew); logLik(tribMixNew) -set.seed(1234) -tribMixlogit <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, - k = 2, nrep = 5, data = tribolium, - model = FLXMRglm(family = binomial(link = logit))) -parameters(tribMixlogit); logLik(tribMixlogit) -set.seed(1234) -tribMixprobit <- initFlexmix(cbind(Remaining, Total - Remaining) ~ Species, - k = 2, nrep = 5, data = tribolium, - model = FLXMRglm(family = binomial(link = probit))) -parameters(tribMixprobit); logLik(tribMixprobit) diff --git a/pkg/inst/extdata/simulateMr.R b/pkg/inst/extdata/simulateMr.R index c09d7a4..28f7ac3 100644 --- a/pkg/inst/extdata/simulateMr.R +++ b/pkg/inst/extdata/simulateMr.R @@ -3,20 +3,22 @@ # Simulate an output of multiRun(...), because this function is a little # complicated to call in an example (see ?multiRun). # -#' β <- matrix(c(1,-2,3,1),ncol=2) - - -# theta, not beta here (more general) -.simulateMr( -#' # mr[[i]] is a list of estimated parameters matrices (here random matrices). -#' # Should be mr <- multiRun(...) --> see bootstrap example in ?multiRun. -#' mr <- list() -#' μ <- normalize(β) -#' for (i in 1:2) { -#' mr[[i]] <- list() -#' for (j in 1:3) -#' mr[[i]][[j]] <- β + matrix(rnorm(4,sd=0.25),ncol=2) -#' mr[[i]] <- alignMatrices(mr[[i]], ref=β, ls_mode="exact") --> TODO: align in same function -#' } - - list(theta = ..., mr = ...) +# Usage: simulateMr(c(nrow, ncol), N) +# where nrow,ncol = dimension of the parameter and N = number of "runs" +# +# Always length(mr) == 2 and random parameter + noise, to not over-parameterize. +# +# Return a list with mr = simulated multiRun() output, and θ = parameter matrix. +simulateMr <- function(dims, N) +{ + library(morpheus) + mr <- list() + θ <- matrix(runif(min=-5,max=5,n=dims[1]*dims[2]), ncol=dims[1]) + for (i in 1:2) { + mr[[i]] <- list() + for (j in 1:N) + mr[[i]][[j]] <- θ + matrix(rnorm(dims[1]*dims[2],sd=0.25),ncol=dims[1]) + mr[[i]] <- alignMatrices(mr[[i]], ref=θ, ls_mode="exact") + } + list(mr = mr, θ = θ) +} diff --git a/vignettes/report.md b/vignettes/report.md new file mode 100644 index 0000000..48984cf --- /dev/null +++ b/vignettes/report.md @@ -0,0 +1,267 @@ +--- +title: Use morpheus package + +output: + pdf_document: + number_sections: true + toc_depth: 1 +--- + +\renewcommand{\P}{\mathrm{P}} +\newcommand{\R}{\mathbb{R}} + + + +## Introduction + + +*morpheus* is a contributed R package which attempts to find the parameters of a +mixture of logistic classifiers. +When the data under study come from several groups that have different characteristics, +using mixture models is a very popular way to handle heterogeneity. +Thus, many algorithms were developed to deal with various mixtures models. +Most of them use likelihood methods or Bayesian methods that are likelihood dependent. +*flexmix* is an R package which implements these kinds of algorithms. + +However, one problem of such methods is that they can converge to local maxima, +so several starting points must be explored. +Recently, spectral methods were developed to bypass EM algorithms and they were proved +able to recover the directions of the regression parameter +in models with known link function and random covariates (see [XX]). +Our package extends such moment methods using least squares to get estimators of the +whole parameters (with theoretical garantees, see [XX]). +Currently it can handle only binary output $-$ which is a common case. + +## Model + +Let $X\in \R^{d}$ be the vector of covariates and $Y\in \{0,1\}$ be the binary output. +A binary regression model assumes that for some link function $g$, the probability that +$Y=1$ conditionally to $X=x$ is given by $g(\langle \beta, x \rangle +b)$, where +$\beta\in \R^{d}$ is the vector of regression coefficients and $b\in\R$ is the intercept. +Popular examples of link functions are the logit link function where for any real $z$, +$g(z)=e^z/(1+e^z)$ and the probit link function where $g(z)=\Phi(z),$ with $\Phi$ +the cumulative distribution function of the standard normal ${\mathcal N}(0,1)$. +Both are implemented in the package. + +If now we want to modelise heterogeneous populations, let $K$ be the number of +populations and $\omega=(\omega_1,\cdots,\omega_K)$ their weights such that +$\omega_{j}\geq 0$, $j=1,\ldots,K$ and $\sum_{j=1}^{K}\omega_{j}=1$. +Define, for $j=1,\ldots,K$, the regression coefficients in the $j$-th population +by $\beta_{j}\in\R^{d}$ and the intercept in the $j$-th population by +$b_{j}\in\R$. Let $\omega =(\omega_{1},\ldots,\omega_{K})$, +$b=(b_1,\cdots,b_K)$, $\beta=[\beta_{1} \vert \cdots,\vert \beta_K]$ the $d\times K$ +matrix of regression coefficients and denote $\theta=(\omega,\beta,b)$. +The model of population mixture of binary regressions is given by: + +\begin{equation} +\label{mixturemodel1} +\P_{\theta}(Y=1\vert X=x)=\sum^{K}_{k=1}\omega_k g(<\beta_k,x>+b_k). +\end{equation} + +## Algorithm, theoretical garantees + +The algorithm uses spectral properties of some tensor matrices to estimate the model +parameters $\Theta = (\omega, \beta, b)$. Under rather mild conditions it can be +proved that the algorithm converges to the correct values (its speed is known too). +For more informations on that subject, however, please refer to our article [XX]. +In this vignette let's rather focus on package usage. + +## Usage + + +The two main functions are: + + * computeMu(), which estimates the parameters directions, and + * optimParams(), which builds an object \code{o} to estimate all other parameters + when calling \code{o$run()}, starting from the directions obtained by the + previous function. + +A third function is useful to run Monte-Carlo or bootstrap estimations using +different models in various contexts: multiRun(). We'll show example for all of them. + +### Estimation of directions + +In a real situation you would have (maybe after some pre-processing) the matrices +X and Y which contain vector inputs and binary output. +However, a function is provided in the package to generate such data following a +pre-defined law: + + +``` r +library(morpheus) +io <- generateSampleIO(n=10000, p=1/2, beta=matrix(c(1,0,0,1),ncol=2), b=c(0,0), link="probit") +# io$X and io$Y contain the sample data +``` + +$n$ is the total number of samples (lines in X, number of elements in Y) +$p$ is a vector of proportions, of size $d-1$ (because the last proportion is deduced +from the others: $p$ elements sums to 1) [TODO: omega or p?] +$\beta$ is the matrix of linear coefficients, as written above in the model. +$b$ is the vector of intercepts (as in linear regression, and as in the model above) +link can be either "logit" or "probit", as mentioned earlier. + +This function outputs a list containing in particular the matrices X and Y, allowing to +use the other functions (which all require either these, or the moments). + + +``` r +mu <- computeMu(io$X, io$Y, optargs=list(K=2)) +``` + +The optional argument, "optargs", is a list which can provide + + * the number of clusters $K$, + * the moments matrix $M$ (computed with the "computeMoments()" function), + * the joint-diagonalisation method ("uwedge" or "jedi"), + * the number of random vectors for joint-diagonalization. + +See ?computeMu and the code for more details. + +### Estimation of the other parameters + +The other parameters are estimated by solving an optimization problem. +The following function builds and return an optimization algorithm object: + + +``` r +# M <- computeMoments(io$X, io$Y) #optional (if several runs) +algopt <- optimParams(io$X, io$Y, K=2, link="probit") #, M=M) +# Optimization starts at beta = mu, b = 0 and p = uniform distribution +x0 <- list(beta = mu) +theta <- algopt$run(x0) +``` + +``` +Loading required package: MASS +``` + +Now theta is a list with three slots: + + * $p$: estimated proportions, + * $\beta$: estimated regression matrix, + * $b$: estimated bias. + +### Monte-Carlo and bootstrap + +The package provides a function to compare methods on several computations on random data. +It takes in input a list of parameters, then a list of functions which output some quantities +(on the first example, our "computeMu()" method versus flexmix way of estimating directions), +and finally a method to prepare the arguments which will be given to the functions in the +list just mentioned; this allows to run Monte-Carlo estimations with the exact same samples +for each compared method. The two last arguments to "multiRun()" control the number of runs, +and the number of cores (using the package parallel). + + +``` r +beta <- matrix(c(1,-2,3,1), ncol=2) +io <- generateSampleIO(n=1000, p=1/2, beta=beta, b=c(0,0), "logit") +mu <- normalize(beta) + +# Example 1: bootstrap + computeMu, morpheus VS flexmix; assumes fargs first 3 elts X,Y,K +mr1 <- multiRun(list(X=io$X,Y=io$Y,optargs=list(K=2,jd_nvects=0)), list( + # morpheus + function(fargs) { + library(morpheus) + ind <- fargs$ind + computeMu(fargs$X[ind,],fargs$Y[ind],fargs$optargs) + }, + # flexmix + function(fargs) { + library(flexmix) + source("../patch_Bettina/FLXMRglm.R") + ind <- fargs$ind + K <- fargs$optargs$K + dat = as.data.frame( cbind(fargs$Y[ind],fargs$X[ind,]) ) + out = refit( flexmix( cbind(V1, 1 - V1) ~ 0+., data=dat, k=K, + model=FLXMRglm(family="binomial") ) ) + normalize( matrix(out@coef[1:(ncol(fargs$X)*K)], ncol=K) ) + } ), + prepareArgs = function(fargs,index) { + # Always include the non-shuffled dataset + if (index == 1) + fargs$ind <- 1:nrow(fargs$X) + else + fargs$ind <- sample(1:nrow(fargs$X),replace=TRUE) + fargs + }, N=10, ncores=3) +# The result is correct up to matrices columns permutations; align them: +for (i in 1:2) + mr1[[i]] <- alignMatrices(mr1[[i]], ref=mu, ls_mode="exact") +``` + +Several plots are available: histograms, boxplots, or curves of coefficients. +We illustrate boxplots and curves here (histograms function uses the same arguments, +see ?plotHist). + + +``` r +# Second row, first column; morpheus on the left, flexmix on the right +plotBox(mr1, 2, 1, main="Target value: -1") +``` + +
+plot of chunk unnamed-chunk-5 +

plot of chunk unnamed-chunk-5

+
+ + +``` r +# Example 2: Monte-Carlo + optimParams from X,Y, morpheus VS flexmix; first args n,p,beta,b +mr2 <- multiRun(list(n=1000,p=1/2,beta=beta,b=c(0,0),optargs=list(link="logit")), list( + # morpheus + function(fargs) { + library(morpheus) + mu <- computeMu(fargs$X, fargs$Y, fargs$optargs) + optimParams(fargs$X,fargs$Y,fargs$K,fargs$link,fargs$M)$run(list(beta=mu))$beta + }, + # flexmix + function(fargs) { + library(flexmix) + source("../patch_Bettina/FLXMRglm.R") + dat <- as.data.frame( cbind(fargs$Y,fargs$X) ) + out <- refit( flexmix( cbind(V1, 1 - V1) ~ 0+., data=dat, k=fargs$K, + model=FLXMRglm(family="binomial") ) ) + sapply( seq_len(fargs$K), function(i) as.double( out@components[[1]][[i]][,1] ) ) + } ), + prepareArgs = function(fargs,index) { + library(morpheus) + io = generateSampleIO(fargs$n, fargs$p, fargs$beta, fargs$b, fargs$optargs$link) + fargs$X = io$X + fargs$Y = io$Y + fargs$K = ncol(fargs$beta) + fargs$link = fargs$optargs$link + fargs$M = computeMoments(io$X,io$Y) + fargs + }, N=10, ncores=3) +# As in example 1, align results: +for (i in 1:2) + mr2[[i]] <- alignMatrices(mr2[[i]], ref=beta, ls_mode="exact") +``` + + +``` r +# Second argument = true parameters matrix; third arg = index of method (here "morpheus") +plotCoefs(mr2[[1]], beta, 1) +``` + +
+plot of chunk unnamed-chunk-7 +

plot of chunk unnamed-chunk-7

+
+ +``` r +plotCoefs(mr2[[2]], beta, 1) +``` + +
+plot of chunk unnamed-chunk-7 +

plot of chunk unnamed-chunk-7

+
+ +``` r +# Real params are on the continous line; estimations = dotted line +``` -- 2.53.0