From 0f5fbd1371011f25cd1f6caf0e826d2ea9e4e245 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Sat, 14 Dec 2019 01:37:37 +0100
Subject: [PATCH] Simplify data sampling + cosmetics + draft unit test for W
 matrix in optim

---
 pkg/R/computeMu.R                     |   4 +-
 pkg/R/multiRun.R                      |   2 +-
 pkg/R/optimParams.R                   |  43 ++++-----
 pkg/R/sampleIO.R                      |  52 +++++------
 pkg/src/functions.c                   |   2 -
 pkg/tests/testthat/test-optimParams.R | 128 ++++++++++++++++++++++++++
 6 files changed, 173 insertions(+), 58 deletions(-)

diff --git a/pkg/R/computeMu.R b/pkg/R/computeMu.R
index f7f82ad..ea931d2 100644
--- a/pkg/R/computeMu.R
+++ b/pkg/R/computeMu.R
@@ -66,9 +66,8 @@ computeMu = function(X, Y, optargs=list())
   jd_method = ifelse(!is.null(optargs$jd_method), optargs$jd_method, "uwedge")
   V =
     if (jd_nvects > 1) {
-      #NOTE: increasing itermax does not help to converge, thus we suppress warnings
+      # NOTE: increasing itermax does not help to converge, thus we suppress warnings
       suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)})
-#      if (jd_method=="uwedge") jd$B else solve(jd$A)
       if (jd_method=="uwedge") jd$B else MASS::ginv(jd$A)
     }
     else
@@ -79,7 +78,6 @@ computeMu = function(X, Y, optargs=list())
   for (i in seq_len(K))
     M2_t[,,i] = .T_I_I_w(M[[3]],V[,i])
   suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)})
-#  U = if (jd_method=="uwedge") solve(jd$B) else jd$A
   U = if (jd_method=="uwedge") MASS::ginv(jd$B) else jd$A
   μ = normalize(U[,1:K])
 
diff --git a/pkg/R/multiRun.R b/pkg/R/multiRun.R
index 56c3e19..f38f9f3 100644
--- a/pkg/R/multiRun.R
+++ b/pkg/R/multiRun.R
@@ -23,7 +23,7 @@
 #' # Bootstrap + computeMu, morpheus VS flexmix ; assumes fargs first 3 elts X,Y,K
 #' io <- generateSampleIO(n=1000, p=1/2, β=β, b=c(0,0), "logit")
 #' μ <- normalize(β)
-#' res <- multiRun(list(X=io$X,Y=io$Y,optargs=list(K=2,jd_nvects=0)), list(
+#' res <- multiRun(list(X=io$X,Y=io$Y,optargs=list(K=2)), list(
 #'   # morpheus
 #'   function(fargs) {
 #'     library(morpheus)
diff --git a/pkg/R/optimParams.R b/pkg/R/optimParams.R
index ecfae7f..b7d901c 100644
--- a/pkg/R/optimParams.R
+++ b/pkg/R/optimParams.R
@@ -115,22 +115,17 @@ setRefClass(
       c(L$p[1:(K-1)], as.double(L$β), L$b)
     },
 
-    #TODO: compare with R version?
-    #D <- diag(d) #matrix of ej vectors
-    #Y * X
-    #Y * ( t( apply(X, 1, function(row) row %o% row) ) - Reduce('+', lapply(1:d, function(j) as.double(D[j,] %o% D[j,])), rep(0, d*d)))
-    #Y * ( t( apply(X, 1, function(row) row %o% row %*% row) ) - Reduce('+', lapply(1:d, function(j) ), rep(0, d*d*d)))
     computeW = function(θ)
     {
-      #require(MASS)
+      #return (diag(c(rep(6,d), rep(3, d^2), rep(1,d^3))))
+      require(MASS)
       dd <- d + d^2 + d^3
       M <- Moments(θ)
       Omega <- matrix( .C("Compute_Omega",
         X=as.double(X), Y=as.double(Y), M=as.double(M),
         pn=as.integer(n), pd=as.integer(d),
         W=as.double(W), PACKAGE="morpheus")$W, nrow=dd, ncol=dd )
-      W <<- MASS::ginv(Omega, tol=1e-4)
-      NULL #avoid returning W
+      MASS::ginv(Omega)
     },
 
     Moments = function(θ)
@@ -166,7 +161,7 @@ setRefClass(
       "Gradient of f, dimension (K-1) + d*K + K = (d+2)*K - 1"
 
       L <- expArgs(θ)
-      -2 * t(grad_M(L)) %*% W %*% as.matrix((Mhat - Moments(L)))
+      -2 * t(grad_M(L)) %*% W %*% as.matrix(Mhat - Moments(L))
     },
 
     grad_M = function(θ)
@@ -245,17 +240,22 @@ setRefClass(
         stop("θ0: list")
       if (is.null(θ0$β))
         stop("At least θ0$β must be provided")
-      if (!is.matrix(θ0$β) || any(is.na(θ0$β)) || ncol(θ0$β) != K)
-        stop("θ0$β: matrix, no NA, ncol == K")
+      if (!is.matrix(θ0$β) || any(is.na(θ0$β))
+        || nrow(θ0$β) != d || ncol(θ0$β) != K)
+      {
+        stop("θ0$β: matrix, no NA, nrow = d, ncol = K")
+      }
       if (is.null(θ0$p))
         θ0$p = rep(1/K, K-1)
-      else if (length(θ0$p) != K-1 || sum(θ0$p) > 1)
-        stop("θ0$p should contain positive integers and sum to < 1")
-      # Next test = heuristic to detect missing b (when matrix is called "beta")
-      if (is.null(θ0$b) || all(θ0$b == θ0$β))
+      else if (!is.numeric(θ0$p) || length(θ0$p) != K-1
+        || any(is.na(θ0$p)) || sum(θ0$p) > 1)
+      {
+        stop("θ0$p: length K-1, no NA, positive integers, sum to <= 1")
+      }
+      if (is.null(θ0$b))
         θ0$b = rep(0, K)
-      else if (any(is.na(θ0$b)))
-        stop("θ0$b cannot have missing values")
+      else if (!is.numeric(θ0$b) || length(θ0$b) != K || any(is.na(θ0$b)))
+        stop("θ0$b: length K, no NA")
       # TODO: stopping condition? N iterations? Delta <= epsilon ?
       for (loop in 1:10)
       {
@@ -264,12 +264,9 @@ setRefClass(
             rbind( rep(-1,K-1), diag(K-1) ),
             matrix(0, nrow=K, ncol=(d+1)*K) ),
           ci=c(-1,rep(0,K-1)) )
-
-        computeW(expArgs(op_res$par))
-        # debug:
-        #print(W)
-        print(op_res$value)
-        print(expArgs(op_res$par))
+        W <<- computeW(expArgs(op_res$par))
+        print(op_res$value) #debug
+        print(expArgs(op_res$par)) #debug
       }
 
       expArgs(op_res$par)
diff --git a/pkg/R/sampleIO.R b/pkg/R/sampleIO.R
index 3d46092..10497db 100644
--- a/pkg/R/sampleIO.R
+++ b/pkg/R/sampleIO.R
@@ -5,7 +5,7 @@
 #' described by the corresponding column parameter in the matrix β + intercept b.
 #'
 #' @param n Number of individuals
-#' @param p Vector of K-1 populations relative proportions (sum <= 1)
+#' @param p Vector of K(-1) populations relative proportions (sum (<)= 1)
 #' @param β Vectors of model parameters for each population, of size dxK
 #' @param b Vector of intercept values (use rep(0,K) for no intercept)
 #' @param link Link type; "logit" or "probit"
@@ -28,31 +28,29 @@ generateSampleIO = function(n, p, β, b, link)
     stop("n: positive integer")
   if (!is.matrix(β) || !is.numeric(β) || any(is.na(β)))
     stop("β: real matrix, no NAs")
-  K = ncol(β)
-  if (!is.numeric(p) || length(p)!=K-1 || any(is.na(p)) || any(p<0) || sum(p) > 1)
-    stop("p: positive vector of size K-1, no NA, sum<=1")
-  p <- c(p, 1-sum(p))
+  K <- ncol(β)
+  if (!is.numeric(p) || length(p)<K-1 || any(is.na(p)) || any(p<0) || sum(p) > 1)
+    stop("p: positive vector of size >= K-1, no NA, sum(<)=1")
+  if (length(p) == K-1)
+    p <- c(p, 1-sum(p))
   if (!is.numeric(b) || length(b)!=K || any(is.na(b)))
     stop("b: real vector of size K, no NA")
 
-  #random generation of the size of each population in X~Y (unordered)
-  classes = rmultinom(1, n, p)
+  # Random generation of the size of each population in X~Y (unordered)
+  classes <- rmultinom(1, n, p)
 
-  d = nrow(β)
-  zero_mean = rep(0,d)
-  id_sigma = diag(rep(1,d))
-  # Always consider an intercept (use b=0 for none)
-  d = d + 1
-  β = rbind(β, b)
-  X = matrix(nrow=0, ncol=d)
-  Y = c()
-  index = c()
-  for (i in 1:ncol(β))
+  d <- nrow(β)
+  zero_mean <- rep(0,d)
+  id_sigma <- diag(rep(1,d))
+  X <- matrix(nrow=0, ncol=d)
+  Y <- c()
+  index <- c()
+  for (i in 1:K)
   {
-    index = c(index, rep(i, classes[i]))
-    newXblock = cbind( MASS::mvrnorm(classes[i], zero_mean, id_sigma), 1 )
-    arg_link = newXblock %*% β[,i] #β
-    probas =
+    index <- c(index, rep(i, classes[i]))
+    newXblock <- MASS::mvrnorm(classes[i], zero_mean, id_sigma)
+    arg_link <- newXblock %*% β[,i] + b[i]
+    probas <-
       if (link == "logit")
       {
         e_arg_link = exp(arg_link)
@@ -61,13 +59,9 @@ generateSampleIO = function(n, p, β, b, link)
       else #"probit"
         pnorm(arg_link)
     probas[is.nan(probas)] = 1 #overflow of exp(x)
-    #probas = rowSums(p * probas)
-    X = rbind(X, newXblock)
-    #Y = c( Y, vapply(probas, function(p) (ifelse(p >= .5, 1, 0)), 1) )
-    Y = c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) )
+    X <- rbind(X, newXblock)
+    Y <- c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) )
   }
-  shuffle = sample(n)
-  # Returned X should not contain an intercept column (it's an argument of estimation
-  # methods)
-  list("X"=X[shuffle,-d], "Y"=Y[shuffle], "index"=index[shuffle])
+  shuffle <- sample(n)
+  list("X"=X[shuffle,], "Y"=Y[shuffle], "index"=index[shuffle])
 }
diff --git a/pkg/src/functions.c b/pkg/src/functions.c
index f251eb3..1f35585 100644
--- a/pkg/src/functions.c
+++ b/pkg/src/functions.c
@@ -99,8 +99,6 @@ void Compute_Omega(double* X, double* Y, double* M, int* pn, int* pd, double* W)
         g[j] -= Y[i] * X[mi(i,idx1,n,d)];
       g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
     }
-
-    // TODO: 1/n des gj empirique doit tendre vers 0
     // Add 1/n t(gi) %*% gi to W
     for (int j=0; j<dim; j++)
     {
diff --git a/pkg/tests/testthat/test-optimParams.R b/pkg/tests/testthat/test-optimParams.R
index 8f65e46..993015f 100644
--- a/pkg/tests/testthat/test-optimParams.R
+++ b/pkg/tests/testthat/test-optimParams.R
@@ -85,3 +85,131 @@ test_that("naive computation provides the same result as vectorized computations
     }
   }
 })
+
+# TODO: test computeW
+#    computeW = function(θ)
+#    {
+#      require(MASS)
+#      dd <- d + d^2 + d^3
+#      M <- Moments(θ)
+#      Id <- as.double(diag(d))
+#      E <- diag(d)
+#      v1 <- Y * X
+#      v2 <- Y * t( apply(X, 1, function(Xi) Xi %o% Xi - Id) )
+#      v3 <- Y * t( apply(X, 1, function(Xi) { return (Xi %o% Xi %o% Xi
+#        - Reduce('+', lapply(1:d, function(j) as.double(Xi %o% E[j,] %o% E[j,])), rep(0, d*d*d))
+#        - Reduce('+', lapply(1:d, function(j) as.double(E[j,] %o% Xi %o% E[j,])), rep(0, d*d*d))
+#        - Reduce('+', lapply(1:d, function(j) as.double(E[j,] %o% E[j,] %o% Xi)), rep(0, d*d*d))) } ) )
+#      Wtmp <- matrix(0, nrow=dd, ncol=dd)
+#      
+#
+#g <- matrix(nrow=n, ncol=dd); for (i in 1:n) g[i,] = c(v1[i,], v2[i,], v3[i,]) - M
+#
+#
+#
+#
+#
+#
+#  p <- θ$p
+#  β <- θ$β
+#  b <- θ$b
+#
+#
+#
+#
+##  # Random generation of the size of each population in X~Y (unordered)
+##  classes <- rmultinom(1, n, p)
+##
+##  #d <- nrow(β)
+##  zero_mean <- rep(0,d)
+##  id_sigma <- diag(rep(1,d))
+##  X <- matrix(nrow=0, ncol=d)
+##  Y <- c()
+##  for (i in 1:ncol(β)) #K = ncol(β)
+##  {
+##    newXblock <- MASS::mvrnorm(classes[i], zero_mean, id_sigma)
+##    arg_link <- newXblock %*% β[,i] + b[i]
+##    probas <-
+##      if (li == "logit")
+##      {
+##        e_arg_link = exp(arg_link)
+##        e_arg_link / (1 + e_arg_link)
+##      }
+##      else #"probit"
+##        pnorm(arg_link)
+##    probas[is.nan(probas)] <- 1 #overflow of exp(x)
+##    X <- rbind(X, newXblock)
+##    Y <- c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) )
+##  }
+#
+#
+#
+#
+#
+#
+#
+#
+#  Mhatt <- c(
+#    colMeans(Y * X),
+#    colMeans(Y * t( apply(X, 1, function(Xi) Xi %o% Xi - Id) )),
+#    colMeans(Y * t( apply(X, 1, function(Xi) { return (Xi %o% Xi %o% Xi
+#      - Reduce('+', lapply(1:d, function(j) as.double(Xi %o% E[j,] %o% E[j,])), rep(0, d*d*d))
+#      - Reduce('+', lapply(1:d, function(j) as.double(E[j,] %o% Xi %o% E[j,])), rep(0, d*d*d))
+#      - Reduce('+', lapply(1:d, function(j) as.double(E[j,] %o% E[j,] %o% Xi)), rep(0, d*d*d))) } ) ) ))
+#  λ <- sqrt(colSums(β^2))
+#  β2 <- apply(β, 2, function(col) col %o% col)
+#  β3 <- apply(β, 2, function(col) col %o% col %o% col)
+#  M <- c(
+#    β  %*% (p * .G(li,1,λ,b)),
+#    β2 %*% (p * .G(li,2,λ,b)),
+#    β3 %*% (p * .G(li,3,λ,b)) )
+#  print(sum(abs(Mhatt - M)))
+#
+#save(list=c("X", "Y"), file="v2.RData")
+#
+#
+#
+#
+#browser()
+#      for (i in 1:n)
+#      {
+#        gi <- t(as.matrix(c(v1[i,], v2[i,], v3[i,]) - M))
+#        Wtmp <- Wtmp + t(gi) %*% gi / n
+#      }
+#      Wtmp
+#      #MASS::ginv(Wtmp)
+#    },
+#
+#    #TODO: compare with R version?
+#    computeW_orig = function(θ)
+#    {
+#      require(MASS)
+#      dd <- d + d^2 + d^3
+#      M <- Moments(θ)
+#      Omega <- matrix( .C("Compute_Omega",
+#        X=as.double(X), Y=as.double(Y), M=as.double(M),
+#        pn=as.integer(n), pd=as.integer(d),
+#        W=as.double(W), PACKAGE="morpheus")$W, nrow=dd, ncol=dd )
+#      Omega
+#      #MASS::ginv(Omega) #, tol=1e-4)
+#    },
+#
+#    Moments = function(θ)
+#    {
+#      "Vector of moments, of size d+d^2+d^3"
+#
+#      p <- θ$p
+#      β <- θ$β
+#      λ <- sqrt(colSums(β^2))
+#      b <- θ$b
+#
+#      # Tensorial products β^2 = β2 and β^3 = β3 must be computed from current β1
+#      β2 <- apply(β, 2, function(col) col %o% col)
+#      β3 <- apply(β, 2, function(col) col %o% col %o% col)
+#
+#      c(
+#        β  %*% (p * .G(li,1,λ,b)),
+#        β2 %*% (p * .G(li,2,λ,b)),
+#        β3 %*% (p * .G(li,3,λ,b)))
+#    },
+#
-- 
2.44.0