X-Git-Url: https://git.auder.net/?p=morpheus.git;a=blobdiff_plain;f=pkg%2FR%2FcomputeMu.R;fp=pkg%2FR%2FcomputeMu.R;h=c1a9c51df53c71a66f62e2a6f285db923714b70c;hp=f961fdebe40bb81fca5409ae1c4cc8aeb9099385;hb=ab35f6102896a49e86e853262c0650faa2931638;hpb=4b2f17bb108bab0f263619cfe00eabfb1e9b8860 diff --git a/pkg/R/computeMu.R b/pkg/R/computeMu.R index f961fde..c1a9c51 100644 --- a/pkg/R/computeMu.R +++ b/pkg/R/computeMu.R @@ -3,6 +3,8 @@ #' Estimate the normalized columns μ of the β matrix parameter in a mixture of #' logistic regressions models, with a spectral method described in the package vignette. #' +#' @name computeMu +#' #' @param X Matrix of input data (size nxd) #' @param Y Vector of binary outputs (size n) #' @param optargs List of optional argument: @@ -21,28 +23,28 @@ #' and \code{generateSampleIO} for I/O random generation. #' #' @examples -#' io = generateSampleIO(10000, 1/2, matrix(c(1,0,0,1),ncol=2), c(0,0), "probit") -#' μ = computeMu(io$X, io$Y, list(K=2)) #or just X and Y for estimated K +#' io <- generateSampleIO(10000, 1/2, matrix(c(1,0,0,1),ncol=2), c(0,0), "probit") +#' μ <- computeMu(io$X, io$Y, list(K=2)) #or just X and Y for estimated K #' #' @export -computeMu = function(X, Y, optargs=list()) +computeMu <- function(X, Y, optargs=list()) { if (!is.matrix(X) || !is.numeric(X) || any(is.na(X))) stop("X: real matrix, no NA") - n = nrow(X) - d = ncol(X) + n <- nrow(X) + d <- ncol(X) if (!is.numeric(Y) || length(Y)!=n || any(Y!=0 & Y!=1)) stop("Y: vector of 0 and 1, size nrow(X), no NA") if (!is.list(optargs)) stop("optargs: list") # Step 0: Obtain the empirically estimated moments tensor, estimate also K - M = if (is.null(optargs$M)) computeMoments(X,Y) else optargs$M - K = optargs$K + M <- if (is.null(optargs$M)) computeMoments(X,Y) else optargs$M + K <- optargs$K if (is.null(K)) { # TODO: improve this basic heuristic - Σ = svd(M[[2]])$d + Σ <- svd(M[[2]])$d large_ratio <- ( abs(Σ[-d] / Σ[-1]) > 3 ) K <- if (any(large_ratio)) max(2, which.min(large_ratio)) else d } @@ -50,24 +52,24 @@ computeMu = function(X, Y, optargs=list()) stop("K: integer >= 2, <= d") # Step 1: generate a family of d matrices to joint-diagonalize to increase robustness - d = ncol(X) - fixed_design = FALSE - jd_nvects = ifelse(!is.null(optargs$jd_nvects), optargs$jd_nvects, 0) + d <- ncol(X) + fixed_design <- FALSE + jd_nvects <- ifelse(!is.null(optargs$jd_nvects), optargs$jd_nvects, 0) if (jd_nvects == 0) { - jd_nvects = d - fixed_design = TRUE + jd_nvects <- d + fixed_design <- TRUE } - M2_t = array(dim=c(d,d,jd_nvects)) + M2_t <- array(dim=c(d,d,jd_nvects)) for (i in seq_len(jd_nvects)) { - rho = if (fixed_design) c(rep(0,i-1),1,rep(0,d-i)) else normalize( rnorm(d) ) - M2_t[,,i] = .T_I_I_w(M[[3]],rho) + rho <- if (fixed_design) c(rep(0,i-1),1,rep(0,d-i)) else normalize( rnorm(d) ) + M2_t[,,i] <- .T_I_I_w(M[[3]],rho) } # Step 2: obtain factors u_i (and their inverse) from the joint diagonalisation of M2_t - jd_method = ifelse(!is.null(optargs$jd_method), optargs$jd_method, "uwedge") - V = + jd_method <- ifelse(!is.null(optargs$jd_method), optargs$jd_method, "uwedge") + V <- if (jd_nvects > 1) { # NOTE: increasing itermax does not help to converge, thus we suppress warnings suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)}) @@ -77,17 +79,17 @@ computeMu = function(X, Y, optargs=list()) eigen(M2_t[,,1])$vectors # Step 3: obtain final factors from joint diagonalisation of T(I,I,u_i) - M2_t = array(dim=c(d,d,K)) + M2_t <- array(dim=c(d,d,K)) for (i in seq_len(K)) - M2_t[,,i] = .T_I_I_w(M[[3]],V[,i]) + M2_t[,,i] <- .T_I_I_w(M[[3]],V[,i]) suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)}) - U = if (jd_method=="uwedge") MASS::ginv(jd$B) else jd$A - μ = normalize(U[,1:K]) + U <- if (jd_method=="uwedge") MASS::ginv(jd$B) else jd$A + μ <- normalize(U[,1:K]) # M1 also writes M1 = sum_k coeff_k * μ_k, where coeff_k >= 0 # ==> search decomposition of vector M1 onto the (truncated) basis μ (of size dxK) # This is a linear system μ %*% C = M1 with C of size K ==> C = psinv(μ) %*% M1 - C = MASS::ginv(μ) %*% M[[1]] - μ[,C < 0] = - μ[,C < 0] + C <- MASS::ginv(μ) %*% M[[1]] + μ[,C < 0] <- - μ[,C < 0] μ }