#' @export
computeMu = function(X, Y, optargs=list())
{
- if (!is.matrix(X) || !is.numeric(X) || any(is.na(X)))
- stop("X: real matrix, no NA")
- n = nrow(X)
- d = ncol(X)
- if (!is.numeric(Y) || length(Y)!=n || any(Y!=0 & Y!=1))
- stop("Y: vector of 0 and 1, size nrow(X), no NA")
- if (!is.list(optargs))
- stop("optargs: list")
+ if (!is.matrix(X) || !is.numeric(X) || any(is.na(X)))
+ stop("X: real matrix, no NA")
+ n = nrow(X)
+ d = ncol(X)
+ if (!is.numeric(Y) || length(Y)!=n || any(Y!=0 & Y!=1))
+ stop("Y: vector of 0 and 1, size nrow(X), no NA")
+ if (!is.list(optargs))
+ stop("optargs: list")
- # Step 0: Obtain the empirically estimated moments tensor, estimate also K
- M = if (is.null(optargs$M)) computeMoments(X,Y) else optargs$M
- K = optargs$K
- if (is.null(K))
- {
- # TODO: improve this basic heuristic
- Σ = svd(M[[2]])$d
- large_ratio <- ( abs(Σ[-d] / Σ[-1]) > 3 )
- K <- if (any(large_ratio)) max(2, which.min(large_ratio)) else d
- }
+ # Step 0: Obtain the empirically estimated moments tensor, estimate also K
+ M = if (is.null(optargs$M)) computeMoments(X,Y) else optargs$M
+ K = optargs$K
+ if (is.null(K))
+ {
+ # TODO: improve this basic heuristic
+ Σ = svd(M[[2]])$d
+ large_ratio <- ( abs(Σ[-d] / Σ[-1]) > 3 )
+ K <- if (any(large_ratio)) max(2, which.min(large_ratio)) else d
+ }
- # Step 1: generate a family of d matrices to joint-diagonalize to increase robustness
- d = ncol(X)
- fixed_design = FALSE
- jd_nvects = ifelse(!is.null(optargs$jd_nvects), optargs$jd_nvects, 0)
- if (jd_nvects == 0)
- {
- jd_nvects = d
- fixed_design = TRUE
- }
- M2_t = array(dim=c(d,d,jd_nvects))
- for (i in seq_len(jd_nvects))
- {
- rho = if (fixed_design) c(rep(0,i-1),1,rep(0,d-i)) else normalize( rnorm(d) )
- M2_t[,,i] = .T_I_I_w(M[[3]],rho)
- }
+ # Step 1: generate a family of d matrices to joint-diagonalize to increase robustness
+ d = ncol(X)
+ fixed_design = FALSE
+ jd_nvects = ifelse(!is.null(optargs$jd_nvects), optargs$jd_nvects, 0)
+ if (jd_nvects == 0)
+ {
+ jd_nvects = d
+ fixed_design = TRUE
+ }
+ M2_t = array(dim=c(d,d,jd_nvects))
+ for (i in seq_len(jd_nvects))
+ {
+ rho = if (fixed_design) c(rep(0,i-1),1,rep(0,d-i)) else normalize( rnorm(d) )
+ M2_t[,,i] = .T_I_I_w(M[[3]],rho)
+ }
- # Step 2: obtain factors u_i (and their inverse) from the joint diagonalisation of M2_t
- jd_method = ifelse(!is.null(optargs$jd_method), optargs$jd_method, "uwedge")
- V =
- if (jd_nvects > 1) {
- #NOTE: increasing itermax does not help to converge, thus we suppress warnings
- suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)})
-# if (jd_method=="uwedge") jd$B else solve(jd$A)
- if (jd_method=="uwedge") jd$B else MASS::ginv(jd$A)
- }
- else
- eigen(M2_t[,,1])$vectors
+ # Step 2: obtain factors u_i (and their inverse) from the joint diagonalisation of M2_t
+ jd_method = ifelse(!is.null(optargs$jd_method), optargs$jd_method, "uwedge")
+ V =
+ if (jd_nvects > 1) {
+ #NOTE: increasing itermax does not help to converge, thus we suppress warnings
+ suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)})
+# if (jd_method=="uwedge") jd$B else solve(jd$A)
+ if (jd_method=="uwedge") jd$B else MASS::ginv(jd$A)
+ }
+ else
+ eigen(M2_t[,,1])$vectors
- # Step 3: obtain final factors from joint diagonalisation of T(I,I,u_i)
- M2_t = array(dim=c(d,d,K))
- for (i in seq_len(K))
- M2_t[,,i] = .T_I_I_w(M[[3]],V[,i])
- suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)})
-# U = if (jd_method=="uwedge") solve(jd$B) else jd$A
- U = if (jd_method=="uwedge") MASS::ginv(jd$B) else jd$A
- μ = normalize(U[,1:K])
+ # Step 3: obtain final factors from joint diagonalisation of T(I,I,u_i)
+ M2_t = array(dim=c(d,d,K))
+ for (i in seq_len(K))
+ M2_t[,,i] = .T_I_I_w(M[[3]],V[,i])
+ suppressWarnings({jd = jointDiag::ajd(M2_t, method=jd_method)})
+# U = if (jd_method=="uwedge") solve(jd$B) else jd$A
+ U = if (jd_method=="uwedge") MASS::ginv(jd$B) else jd$A
+ μ = normalize(U[,1:K])
- # M1 also writes M1 = sum_k coeff_k * μ_k, where coeff_k >= 0
- # ==> search decomposition of vector M1 onto the (truncated) basis μ (of size dxK)
- # This is a linear system μ %*% C = M1 with C of size K ==> C = psinv(μ) %*% M1
- C = MASS::ginv(μ) %*% M[[1]]
- μ[,C < 0] = - μ[,C < 0]
- μ
+ # M1 also writes M1 = sum_k coeff_k * μ_k, where coeff_k >= 0
+ # ==> search decomposition of vector M1 onto the (truncated) basis μ (of size dxK)
+ # This is a linear system μ %*% C = M1 with C of size K ==> C = psinv(μ) %*% M1
+ C = MASS::ginv(μ) %*% M[[1]]
+ μ[,C < 0] = - μ[,C < 0]
+ μ
}
#' res[[i]] <- alignMatrices(res[[i]], ref=β, ls_mode="exact")}
#' @export
multiRun <- function(fargs, estimParams,
- prepareArgs = function(x,i) x, N=10, ncores=3, agg=lapply, verbose=FALSE)
+ prepareArgs = function(x,i) x, N=10, ncores=3, agg=lapply, verbose=FALSE)
{
- if (!is.list(fargs))
- stop("fargs: list")
- # No checks on fargs: supposedly done in estimParams[[i]]()
- if (!is.list(estimParams))
- estimParams = list(estimParams)
- # Verify that the provided parameters estimations are indeed functions
- lapply(seq_along(estimParams), function(i) {
- if (!is.function(estimParams[[i]]))
- stop("estimParams: list of function(fargs)")
- })
- if (!is.numeric(N) || N < 1)
- stop("N: positive integer")
+ if (!is.list(fargs))
+ stop("fargs: list")
+ # No checks on fargs: supposedly done in estimParams[[i]]()
+ if (!is.list(estimParams))
+ estimParams = list(estimParams)
+ # Verify that the provided parameters estimations are indeed functions
+ lapply(seq_along(estimParams), function(i) {
+ if (!is.function(estimParams[[i]]))
+ stop("estimParams: list of function(fargs)")
+ })
+ if (!is.numeric(N) || N < 1)
+ stop("N: positive integer")
- estimParamAtIndex <- function(index)
- {
- fargs <- prepareArgs(fargs, index)
- if (verbose)
- cat("Run ",index,"\n")
- lapply(seq_along(estimParams), function(i) {
- if (verbose)
- cat(" Method ",i,"\n")
- out <- estimParams[[i]](fargs)
- if (is.list(out))
- do.call(rbind, out)
- else
- out
- })
- }
+ estimParamAtIndex <- function(index)
+ {
+ fargs <- prepareArgs(fargs, index)
+ if (verbose)
+ cat("Run ",index,"\n")
+ lapply(seq_along(estimParams), function(i) {
+ if (verbose)
+ cat(" Method ",i,"\n")
+ out <- estimParams[[i]](fargs)
+ if (is.list(out))
+ do.call(rbind, out)
+ else
+ out
+ })
+ }
- if (ncores > 1)
- {
- cl = parallel::makeCluster(ncores, outfile="")
- parallel::clusterExport(cl, c("fargs","verbose"), environment())
- list_res = parallel::clusterApplyLB(cl, 1:N, estimParamAtIndex)
- parallel::stopCluster(cl)
- }
- else
- list_res = lapply(1:N, estimParamAtIndex)
+ if (ncores > 1)
+ {
+ cl = parallel::makeCluster(ncores, outfile="")
+ parallel::clusterExport(cl, c("fargs","verbose"), environment())
+ list_res = parallel::clusterApplyLB(cl, 1:N, estimParamAtIndex)
+ parallel::stopCluster(cl)
+ }
+ else
+ list_res = lapply(1:N, estimParamAtIndex)
- # De-interlace results: output one list per function
- nf <- length(estimParams)
- lapply( seq_len(nf), function(i) lapply(seq_len(N), function(j) list_res[[j]][[i]]) )
+ # De-interlace results: output one list per function
+ nf <- length(estimParams)
+ lapply( seq_len(nf), function(i) lapply(seq_len(N), function(j) list_res[[j]][[i]]) )
}
#' @export
optimParams <- function(X, Y, K, link=c("logit","probit"))
{
- # Check arguments
+ # Check arguments
if (!is.matrix(X) || any(is.na(X)))
stop("X: numeric matrix, no NAs")
if (!is.numeric(Y) || any(is.na(Y)) || any(Y!=0 & Y!=1))
stop("Y: binary vector with 0 and 1 only")
- link <- match.arg(link)
+ link <- match.arg(link)
if (!is.numeric(K) || K!=floor(K) || K < 2)
stop("K: integer >= 2")
- # Build and return optimization algorithm object
- methods::new("OptimParams", "li"=link, "X"=X,
+ # Build and return optimization algorithm object
+ methods::new("OptimParams", "li"=link, "X"=X,
"Y"=as.integer(Y), "K"=as.integer(K))
}
#' @field W Weights matrix (iteratively refined)
#'
setRefClass(
- Class = "OptimParams",
+ Class = "OptimParams",
- fields = list(
- # Inputs
- li = "character", #link function
- X = "matrix",
- Y = "numeric",
+ fields = list(
+ # Inputs
+ li = "character", #link function
+ X = "matrix",
+ Y = "numeric",
Mhat = "numeric", #vector of empirical moments
- # Dimensions
- K = "integer",
+ # Dimensions
+ K = "integer",
n = "integer",
- d = "integer",
+ d = "integer",
# Weights matrix (generalized least square)
W = "matrix"
- ),
+ ),
- methods = list(
- initialize = function(...)
- {
- "Check args and initialize K, d, W"
+ methods = list(
+ initialize = function(...)
+ {
+ "Check args and initialize K, d, W"
callSuper(...)
- if (!hasArg("X") || !hasArg("Y") || !hasArg("K") || !hasArg("li"))
- stop("Missing arguments")
+ if (!hasArg("X") || !hasArg("Y") || !hasArg("K") || !hasArg("li"))
+ stop("Missing arguments")
# Precompute empirical moments
M <- computeMoments(X, Y)
M3 <- as.double(M[[3]])
Mhat <<- c(M1, M2, M3)
- n <<- nrow(X)
- d <<- length(M1)
+ n <<- nrow(X)
+ d <<- length(M1)
W <<- diag(d+d^2+d^3) #initialize at W = Identity
- },
+ },
- expArgs = function(v)
- {
- "Expand individual arguments from vector v into a list"
+ expArgs = function(v)
+ {
+ "Expand individual arguments from vector v into a list"
- list(
- # p: dimension K-1, need to be completed
- "p" = c(v[1:(K-1)], 1-sum(v[1:(K-1)])),
- "β" = matrix(v[K:(K+d*K-1)], ncol=K),
- "b" = v[(K+d*K):(K+(d+1)*K-1)])
- },
+ list(
+ # p: dimension K-1, need to be completed
+ "p" = c(v[1:(K-1)], 1-sum(v[1:(K-1)])),
+ "β" = matrix(v[K:(K+d*K-1)], ncol=K),
+ "b" = v[(K+d*K):(K+(d+1)*K-1)])
+ },
- linArgs = function(L)
- {
- "Linearize vectors+matrices from list L into a vector"
+ linArgs = function(L)
+ {
+ "Linearize vectors+matrices from list L into a vector"
- c(L$p[1:(K-1)], as.double(L$β), L$b)
- },
+ c(L$p[1:(K-1)], as.double(L$β), L$b)
+ },
computeW = function(θ)
{
"Vector of moments, of size d+d^2+d^3"
p <- θ$p
- β <- θ$β
- λ <- sqrt(colSums(β^2))
- b <- θ$b
-
- # Tensorial products β^2 = β2 and β^3 = β3 must be computed from current β1
- β2 <- apply(β, 2, function(col) col %o% col)
- β3 <- apply(β, 2, function(col) col %o% col %o% col)
-
- c(
- β %*% (p * .G(li,1,λ,b)),
- β2 %*% (p * .G(li,2,λ,b)),
- β3 %*% (p * .G(li,3,λ,b)))
+ β <- θ$β
+ λ <- sqrt(colSums(β^2))
+ b <- θ$b
+
+ # Tensorial products β^2 = β2 and β^3 = β3 must be computed from current β1
+ β2 <- apply(β, 2, function(col) col %o% col)
+ β3 <- apply(β, 2, function(col) col %o% col %o% col)
+
+ c(
+ β %*% (p * .G(li,1,λ,b)),
+ β2 %*% (p * .G(li,2,λ,b)),
+ β3 %*% (p * .G(li,3,λ,b)))
},
f = function(θ)
{
- "Product t(hat_Mi - Mi) W (hat_Mi - Mi) with Mi(theta)"
+ "Product t(hat_Mi - Mi) W (hat_Mi - Mi) with Mi(theta)"
L <- expArgs(θ)
- A <- as.matrix(Mhat - Moments(L))
+ A <- as.matrix(Mhat - Moments(L))
t(A) %*% W %*% A
},
- grad_f = function(θ)
- {
- "Gradient of f, dimension (K-1) + d*K + K = (d+2)*K - 1"
+ grad_f = function(θ)
+ {
+ "Gradient of f, dimension (K-1) + d*K + K = (d+2)*K - 1"
L <- expArgs(θ)
-2 * t(grad_M(L)) %*% W %*% as.matrix((Mhat - Moments(L)))
{
"Gradient of the vector of moments, size (dim=)d+d^2+d^3 x K-1+K+d*K"
- p <- θ$p
- β <- θ$β
- λ <- sqrt(colSums(β^2))
- μ <- sweep(β, 2, λ, '/')
- b <- θ$b
+ p <- θ$p
+ β <- θ$β
+ λ <- sqrt(colSums(β^2))
+ μ <- sweep(β, 2, λ, '/')
+ b <- θ$b
res <- matrix(nrow=nrow(W), ncol=0)
- # Tensorial products β^2 = β2 and β^3 = β3 must be computed from current β1
- β2 <- apply(β, 2, function(col) col %o% col)
- β3 <- apply(β, 2, function(col) col %o% col %o% col)
+ # Tensorial products β^2 = β2 and β^3 = β3 must be computed from current β1
+ β2 <- apply(β, 2, function(col) col %o% col)
+ β3 <- apply(β, 2, function(col) col %o% col %o% col)
- # Some precomputations
- G1 = .G(li,1,λ,b)
- G2 = .G(li,2,λ,b)
- G3 = .G(li,3,λ,b)
- G4 = .G(li,4,λ,b)
- G5 = .G(li,5,λ,b)
+ # Some precomputations
+ G1 = .G(li,1,λ,b)
+ G2 = .G(li,2,λ,b)
+ G3 = .G(li,3,λ,b)
+ G4 = .G(li,4,λ,b)
+ G5 = .G(li,5,λ,b)
# Gradient on p: K-1 columns, dim rows
- km1 = 1:(K-1)
- res <- cbind(res, rbind(
+ km1 = 1:(K-1)
+ res <- cbind(res, rbind(
sweep(as.matrix(β [,km1]), 2, G1[km1], '*') - G1[K] * β [,K],
sweep(as.matrix(β2[,km1]), 2, G2[km1], '*') - G2[K] * β2[,K],
sweep(as.matrix(β3[,km1]), 2, G3[km1], '*') - G3[K] * β3[,K] ))
- for (i in 1:d)
- {
- # i determines the derivated matrix dβ[2,3]
-
- dβ_left <- sweep(β, 2, p * G3 * β[i,], '*')
- dβ_right <- matrix(0, nrow=d, ncol=K)
- block <- i
- dβ_right[block,] <- dβ_right[block,] + 1
- dβ <- dβ_left + sweep(dβ_right, 2, p * G1, '*')
-
- dβ2_left <- sweep(β2, 2, p * G4 * β[i,], '*')
- dβ2_right <- do.call( rbind, lapply(1:d, function(j) {
- sweep(dβ_right, 2, β[j,], '*')
- }) )
- block <- ((i-1)*d+1):(i*d)
- dβ2_right[block,] <- dβ2_right[block,] + β
- dβ2 <- dβ2_left + sweep(dβ2_right, 2, p * G2, '*')
-
- dβ3_left <- sweep(β3, 2, p * G5 * β[i,], '*')
- dβ3_right <- do.call( rbind, lapply(1:d, function(j) {
- sweep(dβ2_right, 2, β[j,], '*')
- }) )
- block <- ((i-1)*d*d+1):(i*d*d)
- dβ3_right[block,] <- dβ3_right[block,] + β2
- dβ3 <- dβ3_left + sweep(dβ3_right, 2, p * G3, '*')
-
- res <- cbind(res, rbind(dβ, dβ2, dβ3))
- }
+ for (i in 1:d)
+ {
+ # i determines the derivated matrix dβ[2,3]
+
+ dβ_left <- sweep(β, 2, p * G3 * β[i,], '*')
+ dβ_right <- matrix(0, nrow=d, ncol=K)
+ block <- i
+ dβ_right[block,] <- dβ_right[block,] + 1
+ dβ <- dβ_left + sweep(dβ_right, 2, p * G1, '*')
+
+ dβ2_left <- sweep(β2, 2, p * G4 * β[i,], '*')
+ dβ2_right <- do.call( rbind, lapply(1:d, function(j) {
+ sweep(dβ_right, 2, β[j,], '*')
+ }) )
+ block <- ((i-1)*d+1):(i*d)
+ dβ2_right[block,] <- dβ2_right[block,] + β
+ dβ2 <- dβ2_left + sweep(dβ2_right, 2, p * G2, '*')
+
+ dβ3_left <- sweep(β3, 2, p * G5 * β[i,], '*')
+ dβ3_right <- do.call( rbind, lapply(1:d, function(j) {
+ sweep(dβ2_right, 2, β[j,], '*')
+ }) )
+ block <- ((i-1)*d*d+1):(i*d*d)
+ dβ3_right[block,] <- dβ3_right[block,] + β2
+ dβ3 <- dβ3_left + sweep(dβ3_right, 2, p * G3, '*')
+
+ res <- cbind(res, rbind(dβ, dβ2, dβ3))
+ }
# Gradient on b
- res <- cbind(res, rbind(
- sweep(β, 2, p * G2, '*'),
- sweep(β2, 2, p * G3, '*'),
- sweep(β3, 2, p * G4, '*') ))
+ res <- cbind(res, rbind(
+ sweep(β, 2, p * G2, '*'),
+ sweep(β2, 2, p * G3, '*'),
+ sweep(β3, 2, p * G4, '*') ))
- res
- },
+ res
+ },
- run = function(θ0)
- {
- "Run optimization from θ0 with solver..."
+ run = function(θ0)
+ {
+ "Run optimization from θ0 with solver..."
- if (!is.list(θ0))
- stop("θ0: list")
+ if (!is.list(θ0))
+ stop("θ0: list")
if (is.null(θ0$β))
stop("At least θ0$β must be provided")
- if (!is.matrix(θ0$β) || any(is.na(θ0$β)) || ncol(θ0$β) != K)
- stop("θ0$β: matrix, no NA, ncol == K")
+ if (!is.matrix(θ0$β) || any(is.na(θ0$β)) || ncol(θ0$β) != K)
+ stop("θ0$β: matrix, no NA, ncol == K")
if (is.null(θ0$p))
θ0$p = rep(1/K, K-1)
else if (length(θ0$p) != K-1 || sum(θ0$p) > 1)
print(expArgs(op_res$par))
}
- expArgs(op_res$par)
- }
- )
+ expArgs(op_res$par)
+ }
+ )
)
# Compute vectorial E[g^{(order)}(<β,x> + b)] with x~N(0,Id) (integral in R^d)
#
.G <- function(link, order, λ, b)
{
- # NOTE: weird "integral divergent" error on inputs:
- # link="probit"; order=2; λ=c(531.8099,586.8893,523.5816); b=c(-118.512674,-3.488020,2.109969)
- # Switch to pracma package for that (but it seems slow...)
+ # NOTE: weird "integral divergent" error on inputs:
+ # link="probit"; order=2; λ=c(531.8099,586.8893,523.5816); b=c(-118.512674,-3.488020,2.109969)
+ # Switch to pracma package for that (but it seems slow...)
sapply( seq_along(λ), function(k) {
res <- NULL
tryCatch({
# Derivatives list: g^(k)(x) for links 'logit' and 'probit'
#
.deriv <- list(
- "probit"=list(
- # 'probit' derivatives list;
- # NOTE: exact values for the integral E[g^(k)(λz+b)] could be computed
- function(x) exp(-x^2/2)/(sqrt(2*pi)), #g'
- function(x) exp(-x^2/2)/(sqrt(2*pi)) * -x, #g''
- function(x) exp(-x^2/2)/(sqrt(2*pi)) * ( x^2 - 1), #g^(3)
- function(x) exp(-x^2/2)/(sqrt(2*pi)) * (-x^3 + 3*x), #g^(4)
- function(x) exp(-x^2/2)/(sqrt(2*pi)) * ( x^4 - 6*x^2 + 3) #g^(5)
- ),
- "logit"=list(
- # Sigmoid derivatives list, obtained with http://www.derivative-calculator.net/
- # @seealso http://www.ece.uc.edu/~aminai/papers/minai_sigmoids_NN93.pdf
- function(x) {e=exp(x); .zin(e /(e+1)^2)}, #g'
- function(x) {e=exp(x); .zin(e*(-e + 1) /(e+1)^3)}, #g''
- function(x) {e=exp(x); .zin(e*( e^2 - 4*e + 1) /(e+1)^4)}, #g^(3)
- function(x) {e=exp(x); .zin(e*(-e^3 + 11*e^2 - 11*e + 1) /(e+1)^5)}, #g^(4)
- function(x) {e=exp(x); .zin(e*( e^4 - 26*e^3 + 66*e^2 - 26*e + 1)/(e+1)^6)} #g^(5)
- )
+ "probit"=list(
+ # 'probit' derivatives list;
+ # NOTE: exact values for the integral E[g^(k)(λz+b)] could be computed
+ function(x) exp(-x^2/2)/(sqrt(2*pi)), #g'
+ function(x) exp(-x^2/2)/(sqrt(2*pi)) * -x, #g''
+ function(x) exp(-x^2/2)/(sqrt(2*pi)) * ( x^2 - 1), #g^(3)
+ function(x) exp(-x^2/2)/(sqrt(2*pi)) * (-x^3 + 3*x), #g^(4)
+ function(x) exp(-x^2/2)/(sqrt(2*pi)) * ( x^4 - 6*x^2 + 3) #g^(5)
+ ),
+ "logit"=list(
+ # Sigmoid derivatives list, obtained with http://www.derivative-calculator.net/
+ # @seealso http://www.ece.uc.edu/~aminai/papers/minai_sigmoids_NN93.pdf
+ function(x) {e=exp(x); .zin(e /(e+1)^2)}, #g'
+ function(x) {e=exp(x); .zin(e*(-e + 1) /(e+1)^3)}, #g''
+ function(x) {e=exp(x); .zin(e*( e^2 - 4*e + 1) /(e+1)^4)}, #g^(3)
+ function(x) {e=exp(x); .zin(e*(-e^3 + 11*e^2 - 11*e + 1) /(e+1)^5)}, #g^(4)
+ function(x) {e=exp(x); .zin(e*( e^4 - 26*e^3 + 66*e^2 - 26*e + 1)/(e+1)^6)} #g^(5)
+ )
)
# Utility for integration: "[return] zero if [argument is] NaN" (Inf / Inf divs)
#
.zin <- function(x)
{
- x[is.nan(x)] <- 0.
- x
+ x[is.nan(x)] <- 0.
+ x
}
#
extractParam <- function(mr, x=1, y=1)
{
- # Obtain L vectors where L = number of res lists in mr
- lapply( mr, function(mr_list) {
- sapply(mr_list, function(m) m[x,y])
- } )
+ # Obtain L vectors where L = number of res lists in mr
+ lapply( mr, function(mr_list) {
+ sapply(mr_list, function(m) m[x,y])
+ } )
}
#' plotHist
#' @export
plotHist <- function(mr, x, y)
{
- params <- extractParam(mr, x, y)
- L = length(params)
- # Plot histograms side by side
- par(mfrow=c(1,L), cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1))
- for (i in 1:L)
- hist(params[[i]], breaks=40, freq=FALSE, xlab="Parameter value", ylab="Density")
+ params <- extractParam(mr, x, y)
+ L = length(params)
+ # Plot histograms side by side
+ par(mfrow=c(1,L), cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1))
+ for (i in 1:L)
+ hist(params[[i]], breaks=40, freq=FALSE, xlab="Parameter value", ylab="Density")
}
#' plotBox
#' @export
plotBox <- function(mr, x, y, xtitle="")
{
- params <- extractParam(mr, x, y)
- L = length(params)
- # Plot boxplots side by side
- par(mfrow=c(1,L), cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1))
- for (i in 1:L)
- boxplot(params[[i]], xlab=xtitle, ylab="Parameter value")
+ params <- extractParam(mr, x, y)
+ L = length(params)
+ # Plot boxplots side by side
+ par(mfrow=c(1,L), cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1))
+ for (i in 1:L)
+ boxplot(params[[i]], xlab=xtitle, ylab="Parameter value")
}
#' plotCoefs
#' @export
plotCoefs <- function(mr, params, idx, xtitle="Parameter")
{
- L <- nrow(mr[[1]][[1]])
- K <- ncol(mr[[1]][[1]])
+ L <- nrow(mr[[1]][[1]])
+ K <- ncol(mr[[1]][[1]])
- params_hat <- matrix(nrow=L, ncol=K)
- stdev <- matrix(nrow=L, ncol=K)
- for (x in 1:L)
- {
- for (y in 1:K)
- {
- estims <- extractParam(mr, x, y)
- params_hat[x,y] <- mean(estims[[idx]])
-# stdev[x,y] <- sqrt( mean( (estims[[idx]] - params[x,y])^2 ) )
- # HACK remove extreme quantile in estims[[i]] before computing sd()
- stdev[x,y] <- sd( estims[[idx]] ) #[ estims[[idx]] < max(estims[[idx]]) & estims[[idx]] > min(estims[[idx]]) ] )
- }
- }
+ params_hat <- matrix(nrow=L, ncol=K)
+ stdev <- matrix(nrow=L, ncol=K)
+ for (x in 1:L)
+ {
+ for (y in 1:K)
+ {
+ estims <- extractParam(mr, x, y)
+ params_hat[x,y] <- mean(estims[[idx]])
+# stdev[x,y] <- sqrt( mean( (estims[[idx]] - params[x,y])^2 ) )
+ # HACK remove extreme quantile in estims[[i]] before computing sd()
+ stdev[x,y] <- sd( estims[[idx]] ) #[ estims[[idx]] < max(estims[[idx]]) & estims[[idx]] > min(estims[[idx]]) ] )
+ }
+ }
- par(cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1))
- params <- as.double(params)
- o <- order(params)
- avg_param <- as.double(params_hat)
- std_param <- as.double(stdev)
- matplot(cbind(params[o],avg_param[o],avg_param[o]+std_param[o],avg_param[o]-std_param[o]),
- col=1, lty=c(1,5,3,3), type="l", lwd=2, xlab=xtitle, ylab="")
+ par(cex.axis=1.5, cex.lab=1.5, mar=c(4.7,5,1,1))
+ params <- as.double(params)
+ o <- order(params)
+ avg_param <- as.double(params_hat)
+ std_param <- as.double(stdev)
+ matplot(cbind(params[o],avg_param[o],avg_param[o]+std_param[o],avg_param[o]-std_param[o]),
+ col=1, lty=c(1,5,3,3), type="l", lwd=2, xlab=xtitle, ylab="")
- #print(o) #not returning o to avoid weird Jupyter issue... (TODO:)
+ #print(o) #not returning o to avoid weird Jupyter issue... (TODO:)
}
#' plotQn
#' @export
plotQn <- function(N, n, p, β, b, link)
{
- d <- nrow(β)
- K <- ncol(β)
- io <- generateSampleIO(n, p, β, b, link)
- op <- optimParams(K, link, list(X=io$X, Y=io$Y))
- # N random starting points gaussian (TODO: around true β?)
- res <- matrix(nrow=d*K+1, ncol=N)
- for (i in seq_len(N))
- {
- β_init <- rnorm(d*K)
- par <- op$run( c(rep(1/K,K-1), β_init, rep(0,K)) )
- par <- op$linArgs(par)
- Qn <- op$f(par)
- res[,i] = c(Qn, par[K:(K+d*K-1)])
- }
- res #TODO: plot this, not just return it...
+ d <- nrow(β)
+ K <- ncol(β)
+ io <- generateSampleIO(n, p, β, b, link)
+ op <- optimParams(K, link, list(X=io$X, Y=io$Y))
+ # N random starting points gaussian (TODO: around true β?)
+ res <- matrix(nrow=d*K+1, ncol=N)
+ for (i in seq_len(N))
+ {
+ β_init <- rnorm(d*K)
+ par <- op$run( c(rep(1/K,K-1), β_init, rep(0,K)) )
+ par <- op$linArgs(par)
+ Qn <- op$f(par)
+ res[,i] = c(Qn, par[K:(K+d*K-1)])
+ }
+ res #TODO: plot this, not just return it...
}
#' @export
generateSampleIO = function(n, p, β, b, link)
{
- # Check arguments
- tryCatch({n = as.integer(n)}, error=function(e) stop("Cannot convert n to integer"))
- if (length(n) > 1)
- warning("n is a vector but should be scalar: only first element used")
- if (n <= 0)
- stop("n: positive integer")
- if (!is.matrix(β) || !is.numeric(β) || any(is.na(β)))
- stop("β: real matrix, no NAs")
- K = ncol(β)
- if (!is.numeric(p) || length(p)!=K-1 || any(is.na(p)) || any(p<0) || sum(p) > 1)
- stop("p: positive vector of size K-1, no NA, sum<=1")
- p <- c(p, 1-sum(p))
- if (!is.numeric(b) || length(b)!=K || any(is.na(b)))
- stop("b: real vector of size K, no NA")
+ # Check arguments
+ tryCatch({n = as.integer(n)}, error=function(e) stop("Cannot convert n to integer"))
+ if (length(n) > 1)
+ warning("n is a vector but should be scalar: only first element used")
+ if (n <= 0)
+ stop("n: positive integer")
+ if (!is.matrix(β) || !is.numeric(β) || any(is.na(β)))
+ stop("β: real matrix, no NAs")
+ K = ncol(β)
+ if (!is.numeric(p) || length(p)!=K-1 || any(is.na(p)) || any(p<0) || sum(p) > 1)
+ stop("p: positive vector of size K-1, no NA, sum<=1")
+ p <- c(p, 1-sum(p))
+ if (!is.numeric(b) || length(b)!=K || any(is.na(b)))
+ stop("b: real vector of size K, no NA")
- #random generation of the size of each population in X~Y (unordered)
- classes = rmultinom(1, n, p)
+ #random generation of the size of each population in X~Y (unordered)
+ classes = rmultinom(1, n, p)
- d = nrow(β)
- zero_mean = rep(0,d)
- id_sigma = diag(rep(1,d))
- # Always consider an intercept (use b=0 for none)
- d = d + 1
- β = rbind(β, b)
- X = matrix(nrow=0, ncol=d)
- Y = c()
- index = c()
- for (i in 1:ncol(β))
- {
- index = c(index, rep(i, classes[i]))
- newXblock = cbind( MASS::mvrnorm(classes[i], zero_mean, id_sigma), 1 )
- arg_link = newXblock %*% β[,i] #β
- probas =
- if (link == "logit")
- {
- e_arg_link = exp(arg_link)
- e_arg_link / (1 + e_arg_link)
- }
- else #"probit"
- pnorm(arg_link)
- probas[is.nan(probas)] = 1 #overflow of exp(x)
- #probas = rowSums(p * probas)
- X = rbind(X, newXblock)
- #Y = c( Y, vapply(probas, function(p) (ifelse(p >= .5, 1, 0)), 1) )
- Y = c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) )
- }
- shuffle = sample(n)
- # Returned X should not contain an intercept column (it's an argument of estimation
- # methods)
- list("X"=X[shuffle,-d], "Y"=Y[shuffle], "index"=index[shuffle])
+ d = nrow(β)
+ zero_mean = rep(0,d)
+ id_sigma = diag(rep(1,d))
+ # Always consider an intercept (use b=0 for none)
+ d = d + 1
+ β = rbind(β, b)
+ X = matrix(nrow=0, ncol=d)
+ Y = c()
+ index = c()
+ for (i in 1:ncol(β))
+ {
+ index = c(index, rep(i, classes[i]))
+ newXblock = cbind( MASS::mvrnorm(classes[i], zero_mean, id_sigma), 1 )
+ arg_link = newXblock %*% β[,i] #β
+ probas =
+ if (link == "logit")
+ {
+ e_arg_link = exp(arg_link)
+ e_arg_link / (1 + e_arg_link)
+ }
+ else #"probit"
+ pnorm(arg_link)
+ probas[is.nan(probas)] = 1 #overflow of exp(x)
+ #probas = rowSums(p * probas)
+ X = rbind(X, newXblock)
+ #Y = c( Y, vapply(probas, function(p) (ifelse(p >= .5, 1, 0)), 1) )
+ Y = c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) )
+ }
+ shuffle = sample(n)
+ # Returned X should not contain an intercept column (it's an argument of estimation
+ # methods)
+ list("X"=X[shuffle,-d], "Y"=Y[shuffle], "index"=index[shuffle])
}
#' @export
normalize = function(X)
{
- X = as.matrix(X)
- norm2 = sqrt( colSums(X^2) )
- sweep(X, 2, norm2, '/')
+ X = as.matrix(X)
+ norm2 = sqrt( colSums(X^2) )
+ sweep(X, 2, norm2, '/')
}
# Computes a tensor-vector product
#
.T_I_I_w = function(Te, w)
{
- d = length(w)
- Ma = matrix(0,nrow=d,ncol=d)
- for (j in 1:d)
- Ma = Ma + w[j] * Te[,,j]
- Ma
+ d = length(w)
+ Ma = matrix(0,nrow=d,ncol=d)
+ for (j in 1:d)
+ Ma = Ma + w[j] * Te[,,j]
+ Ma
}
# Computes the second-order empirical moment between input X and output Y
#
.Moments_M2 = function(X, Y)
{
- n = nrow(X)
- d = ncol(X)
- M2 = matrix(0,nrow=d,ncol=d)
- matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
- pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
+ n = nrow(X)
+ d = ncol(X)
+ M2 = matrix(0,nrow=d,ncol=d)
+ matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
+ pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
}
# Computes the third-order empirical moment between input X and output Y
#
.Moments_M3 = function(X, Y)
{
- n = nrow(X)
- d = ncol(X)
- M3 = array(0,dim=c(d,d,d))
- array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
- pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
+ n = nrow(X)
+ d = ncol(X)
+ M3 = array(0,dim=c(d,d,d))
+ array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
+ pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
}
#' computeMoments
#'
#' @export
computeMoments = function(X, Y)
- list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
+ list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
# Find the optimal assignment (permutation) between two sets (minimize cost)
#
#
.hungarianAlgorithm = function(distances)
{
- n = nrow(distances)
- .C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
- assignment=integer(n), PACKAGE="morpheus")$assignment
+ n = nrow(distances)
+ .C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
+ assignment=integer(n), PACKAGE="morpheus")$assignment
}
#' alignMatrices
#' @export
alignMatrices = function(Ms, ref, ls_mode)
{
- if (!is.matrix(ref) && ref != "mean")
- stop("ref: matrix or 'mean'")
- if (!ls_mode %in% c("exact","approx1","approx2"))
- stop("ls_mode in {'exact','approx1','approx2'}")
+ if (!is.matrix(ref) && ref != "mean")
+ stop("ref: matrix or 'mean'")
+ if (!ls_mode %in% c("exact","approx1","approx2"))
+ stop("ls_mode in {'exact','approx1','approx2'}")
- K <- ncol(Ms[[1]])
- if (is.character(ref)) #ref=="mean"
- m_sum = Ms[[1]]
- L <- length(Ms)
- for (i in ifelse(is.character(ref),2,1):L)
- {
- m_ref = if (is.character(ref)) m_sum / (i-1) else ref
- m = Ms[[i]] #shorthand
+ K <- ncol(Ms[[1]])
+ if (is.character(ref)) #ref=="mean"
+ m_sum = Ms[[1]]
+ L <- length(Ms)
+ for (i in ifelse(is.character(ref),2,1):L)
+ {
+ m_ref = if (is.character(ref)) m_sum / (i-1) else ref
+ m = Ms[[i]] #shorthand
- if (ls_mode == "exact")
- {
- #distances[i,j] = distance between m column i and ref column j
- distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
- assignment = .hungarianAlgorithm(distances)
- col <- m[,assignment]
- if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
- }
- else
- {
- # Greedy matching:
- # approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
- # approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
- available_indices = 1:K
- for (j in 1:K)
- {
- distances =
- if (ls_mode == "approx1")
- {
- apply(as.matrix(m[,available_indices]), 2,
- function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
- }
- else #approx2
- {
- apply(as.matrix(m_ref[,available_indices]), 2,
- function(col) ( sqrt(sum((col - m[,j])^2)) ) )
- }
- indMin = which.min(distances)
- if (ls_mode == "approx1")
- {
- col <- m[ , available_indices[indMin] ]
- if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
- }
- else #approx2
- {
- col <- available_indices[indMin]
- if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
- }
- available_indices = available_indices[-indMin]
- }
- }
+ if (ls_mode == "exact")
+ {
+ #distances[i,j] = distance between m column i and ref column j
+ distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
+ assignment = .hungarianAlgorithm(distances)
+ col <- m[,assignment]
+ if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
+ }
+ else
+ {
+ # Greedy matching:
+ # approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
+ # approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
+ available_indices = 1:K
+ for (j in 1:K)
+ {
+ distances =
+ if (ls_mode == "approx1")
+ {
+ apply(as.matrix(m[,available_indices]), 2,
+ function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
+ }
+ else #approx2
+ {
+ apply(as.matrix(m_ref[,available_indices]), 2,
+ function(col) ( sqrt(sum((col - m[,j])^2)) ) )
+ }
+ indMin = which.min(distances)
+ if (ls_mode == "approx1")
+ {
+ col <- m[ , available_indices[indMin] ]
+ if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
+ }
+ else #approx2
+ {
+ col <- available_indices[indMin]
+ if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
+ }
+ available_indices = available_indices[-indMin]
+ }
+ }
- # Update current sum with "label-switched" li[[i]]
- if (is.character(ref)) #ref=="mean"
- m_sum = m_sum + Ms[[i]]
- }
- Ms
+ # Update current sum with "label-switched" li[[i]]
+ if (is.character(ref)) #ref=="mean"
+ m_sum = m_sum + Ms[[i]]
+ }
+ Ms
}
// Index matrix (by columns)
int mi(int i, int j, int d1, int d2)
{
- return j*d1 + i;
+ return j*d1 + i;
}
// Index 3-tensor (by columns, matrices ordered by last dim)
int ti(int i, int j, int k, int d1, int d2, int d3)
{
- return k*d1*d2 + j*d1 + i;
+ return k*d1*d2 + j*d1 + i;
}
// Empirical cross-moment of order 2 between X size nxd and Y size n
void Moments_M2(double* X, double* Y, int* pn, int* pd, double* M2)
{
- int n=*pn, d=*pd;
- //double* M2 = (double*)calloc(d*d,sizeof(double));
+ int n=*pn, d=*pd;
+ //double* M2 = (double*)calloc(d*d,sizeof(double));
- // M2 = E[Y*X^*2] - E[Y*e^*2] = E[Y (X^*2 - I)]
- for (int j=0; j<d; j++)
- {
- for (int i=0; i<n; i++)
- {
- M2[mi(j,j,d,d)] -= Y[i] / n;
- for (int k=0; k<d; k++)
- M2[mi(j,k,d,d)] += Y[i] * X[mi(i,j,n,d)]*X[mi(i,k,n,d)] / n;
- }
- }
+ // M2 = E[Y*X^*2] - E[Y*e^*2] = E[Y (X^*2 - I)]
+ for (int j=0; j<d; j++)
+ {
+ for (int i=0; i<n; i++)
+ {
+ M2[mi(j,j,d,d)] -= Y[i] / n;
+ for (int k=0; k<d; k++)
+ M2[mi(j,k,d,d)] += Y[i] * X[mi(i,j,n,d)]*X[mi(i,k,n,d)] / n;
+ }
+ }
}
// Empirical cross-moment of order 3 between X size nxd and Y size n
void Moments_M3(double* X, double* Y, int* pn, int* pd, double* M3)
{
- int n=*pn, d=*pd;
- //double* M3 = (double*)calloc(d*d*d,sizeof(double));
+ int n=*pn, d=*pd;
+ //double* M3 = (double*)calloc(d*d*d,sizeof(double));
- // M3 = E[Y*X^*3] - E[Y*e*X*e] - E[Y*e*e*X] - E[Y*X*e*e]
- for (int j=0; j<d; j++)
- {
- for (int k=0; k<d; k++)
- {
- for (int i=0; i<n; i++)
- {
- double tensor_elt = Y[i]*X[mi(i,k,n,d)] / n;
- M3[ti(j,k,j,d,d,d)] -= tensor_elt;
- M3[ti(j,j,k,d,d,d)] -= tensor_elt;
- M3[ti(k,j,j,d,d,d)] -= tensor_elt;
- for (int o=0; o<d; o++)
- M3[ti(j,k,o,d,d,d)] += Y[i] * X[mi(i,j,n,d)]*X[mi(i,k,n,d)]*X[mi(i,o,n,d)] / n;
- }
- }
- }
+ // M3 = E[Y*X^*3] - E[Y*e*X*e] - E[Y*e*e*X] - E[Y*X*e*e]
+ for (int j=0; j<d; j++)
+ {
+ for (int k=0; k<d; k++)
+ {
+ for (int i=0; i<n; i++)
+ {
+ double tensor_elt = Y[i]*X[mi(i,k,n,d)] / n;
+ M3[ti(j,k,j,d,d,d)] -= tensor_elt;
+ M3[ti(j,j,k,d,d,d)] -= tensor_elt;
+ M3[ti(k,j,j,d,d,d)] -= tensor_elt;
+ for (int o=0; o<d; o++)
+ M3[ti(j,k,o,d,d,d)] += Y[i] * X[mi(i,j,n,d)]*X[mi(i,k,n,d)]*X[mi(i,o,n,d)] / n;
+ }
+ }
+ }
}
// W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) )
// with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments
void Compute_Omega(double* X, double* Y, double* M, int* pn, int* pd, double* W)
{
- int n=*pn, d=*pd;
+ int n=*pn, d=*pd;
int dim = d + d*d + d*d*d;
//double* W = (double*)malloc(dim*dim*sizeof(double));
g[j] = 0.0;
if (idx1 == idx2)
g[j] -= Y[i];
- g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)] - M[j];
+ g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)] - M[j];
}
for (int j=d+d*d; j<dim; j++)
{
**
** Parts of the used code was originally provided by the
** "Stanford GraphGase", but I made changes to this code.
- ** As asked by the copyright node of the "Stanford GraphGase",
+ ** As asked by the copyright node of the "Stanford GraphGase",
** I hereby proclaim that this file are *NOT* part of the
** "Stanford GraphGase" distrubition!
**
#define verbose (0)
typedef struct {
- int num_rows;
- int num_cols;
- double** cost;
- int** assignment;
+ int num_rows;
+ int num_cols;
+ double** cost;
+ int** assignment;
} hungarian_problem_t;
int hungarian_imax(int a, int b) {
- return (a<b)?b:a;
+ return (a<b)?b:a;
}
/** This method initialize the hungarian_problem structure and init
* the cost matrices (missing lines or columns are filled with 0).
* It returns the size of the quadratic(!) assignment matrix. **/
int hungarian_init(hungarian_problem_t* p, double** cost_matrix, int rows, int cols,
- int mode)
+ int mode)
{
- int i,j;
- double max_cost = 0.;
- int org_cols = cols,
- org_rows = rows;
-
- // is the number of cols not equal to number of rows ?
- // if yes, expand with 0-cols / 0-cols
- rows = hungarian_imax(cols, rows);
- cols = rows;
-
- p->num_rows = rows;
- p->num_cols = cols;
-
- p->cost = (double**)calloc(rows,sizeof(double*));
- p->assignment = (int**)calloc(rows,sizeof(int*));
-
- for(i=0; i<p->num_rows; i++) {
- p->cost[i] = (double*)calloc(cols,sizeof(double));
- p->assignment[i] = (int*)calloc(cols,sizeof(int));
- for(j=0; j<p->num_cols; j++) {
- p->cost[i][j] = (i < org_rows && j < org_cols) ? cost_matrix[i][j] : 0.;
- p->assignment[i][j] = 0;
-
- if (max_cost < p->cost[i][j])
- max_cost = p->cost[i][j];
- }
- }
-
- if (mode == HUNGARIAN_MODE_MAXIMIZE_UTIL) {
- for(i=0; i<p->num_rows; i++) {
- for(j=0; j<p->num_cols; j++)
- p->cost[i][j] = max_cost - p->cost[i][j];
- }
- }
- else if (mode == HUNGARIAN_MODE_MINIMIZE_COST) {
- // nothing to do
- }
-// else
-// fprintf(stderr,"%s: unknown mode. Mode was set to HUNGARIAN_MODE_MINIMIZE_COST !\n", __FUNCTION__);
-
- return rows;
+ int i,j;
+ double max_cost = 0.;
+ int org_cols = cols,
+ org_rows = rows;
+
+ // is the number of cols not equal to number of rows ?
+ // if yes, expand with 0-cols / 0-cols
+ rows = hungarian_imax(cols, rows);
+ cols = rows;
+
+ p->num_rows = rows;
+ p->num_cols = cols;
+
+ p->cost = (double**)calloc(rows,sizeof(double*));
+ p->assignment = (int**)calloc(rows,sizeof(int*));
+
+ for(i=0; i<p->num_rows; i++) {
+ p->cost[i] = (double*)calloc(cols,sizeof(double));
+ p->assignment[i] = (int*)calloc(cols,sizeof(int));
+ for(j=0; j<p->num_cols; j++) {
+ p->cost[i][j] = (i < org_rows && j < org_cols) ? cost_matrix[i][j] : 0.;
+ p->assignment[i][j] = 0;
+
+ if (max_cost < p->cost[i][j])
+ max_cost = p->cost[i][j];
+ }
+ }
+
+ if (mode == HUNGARIAN_MODE_MAXIMIZE_UTIL) {
+ for(i=0; i<p->num_rows; i++) {
+ for(j=0; j<p->num_cols; j++)
+ p->cost[i][j] = max_cost - p->cost[i][j];
+ }
+ }
+ else if (mode == HUNGARIAN_MODE_MINIMIZE_COST) {
+ // nothing to do
+ }
+// else
+// fprintf(stderr,"%s: unknown mode. Mode was set to HUNGARIAN_MODE_MINIMIZE_COST !\n", __FUNCTION__);
+
+ return rows;
}
/** Free the memory allocated by init. **/
void hungarian_free(hungarian_problem_t* p) {
- int i;
- for(i=0; i<p->num_rows; i++) {
- free(p->cost[i]);
- free(p->assignment[i]);
- }
- free(p->cost);
- free(p->assignment);
- p->cost = NULL;
- p->assignment = NULL;
+ int i;
+ for(i=0; i<p->num_rows; i++) {
+ free(p->cost[i]);
+ free(p->assignment[i]);
+ }
+ free(p->cost);
+ free(p->assignment);
+ p->cost = NULL;
+ p->assignment = NULL;
}
/** This method computes the optimal assignment. **/
void hungarian_solve(hungarian_problem_t* p)
{
- int i, j, m, n, k, l, t, q, unmatched;
- double cost, s;
- int* col_mate;
- int* row_mate;
- int* parent_row;
- int* unchosen_row;
- double* row_dec;
- double* col_inc;
- double* slack;
- int* slack_row;
-
- cost = 0.;
- m =p->num_rows;
- n =p->num_cols;
-
- col_mate = (int*)calloc(p->num_rows,sizeof(int));
- unchosen_row = (int*)calloc(p->num_rows,sizeof(int));
- row_dec = (double*)calloc(p->num_rows,sizeof(double));
- slack_row = (int*)calloc(p->num_rows,sizeof(int));
-
- row_mate = (int*)calloc(p->num_cols,sizeof(int));
- parent_row = (int*)calloc(p->num_cols,sizeof(int));
- col_inc = (double*)calloc(p->num_cols,sizeof(double));
- slack = (double*)calloc(p->num_cols,sizeof(double));
-
- for (i=0; i<p->num_rows; i++) {
- col_mate[i]=0;
- unchosen_row[i]=0;
- row_dec[i]=0.;
- slack_row[i]=0;
- }
- for (j=0; j<p->num_cols; j++) {
- row_mate[j]=0;
- parent_row[j] = 0;
- col_inc[j]=0.;
- slack[j]=0.;
- }
-
- for (i=0; i<p->num_rows; ++i)
- for (j=0; j<p->num_cols; ++j)
- p->assignment[i][j]=HUNGARIAN_NOT_ASSIGNED;
-
- // Begin subtract column minima in order to start with lots of zeroes 12
- for (l=0; l<n; l++)
- {
- s=p->cost[0][l];
- for (k=1; k<m; k++)
- if (p->cost[k][l]<s)
- s=p->cost[k][l];
- cost+=s;
- if (s!=0.)
- for (k=0; k<m; k++)
- p->cost[k][l]-=s;
- }
- // End subtract column minima in order to start with lots of zeroes 12
-
- // Begin initial state 16
- t=0;
- for (l=0; l<n; l++)
- {
- row_mate[l]= -1;
- parent_row[l]= -1;
- col_inc[l]=0.;
- slack[l]=INF;
- }
- for (k=0; k<m; k++)
- {
- s=p->cost[k][0];
- for (l=1; l<n; l++)
- if (p->cost[k][l]<s)
- s=p->cost[k][l];
- row_dec[k]=s;
- for (l=0; l<n; l++)
- if (s==p->cost[k][l] && row_mate[l]<0)
- {
- col_mate[k]=l;
- row_mate[l]=k;
- goto row_done;
- }
- col_mate[k]= -1;
- unchosen_row[t++]=k;
+ int i, j, m, n, k, l, t, q, unmatched;
+ double cost, s;
+ int* col_mate;
+ int* row_mate;
+ int* parent_row;
+ int* unchosen_row;
+ double* row_dec;
+ double* col_inc;
+ double* slack;
+ int* slack_row;
+
+ cost = 0.;
+ m =p->num_rows;
+ n =p->num_cols;
+
+ col_mate = (int*)calloc(p->num_rows,sizeof(int));
+ unchosen_row = (int*)calloc(p->num_rows,sizeof(int));
+ row_dec = (double*)calloc(p->num_rows,sizeof(double));
+ slack_row = (int*)calloc(p->num_rows,sizeof(int));
+
+ row_mate = (int*)calloc(p->num_cols,sizeof(int));
+ parent_row = (int*)calloc(p->num_cols,sizeof(int));
+ col_inc = (double*)calloc(p->num_cols,sizeof(double));
+ slack = (double*)calloc(p->num_cols,sizeof(double));
+
+ for (i=0; i<p->num_rows; i++) {
+ col_mate[i]=0;
+ unchosen_row[i]=0;
+ row_dec[i]=0.;
+ slack_row[i]=0;
+ }
+ for (j=0; j<p->num_cols; j++) {
+ row_mate[j]=0;
+ parent_row[j] = 0;
+ col_inc[j]=0.;
+ slack[j]=0.;
+ }
+
+ for (i=0; i<p->num_rows; ++i)
+ for (j=0; j<p->num_cols; ++j)
+ p->assignment[i][j]=HUNGARIAN_NOT_ASSIGNED;
+
+ // Begin subtract column minima in order to start with lots of zeroes 12
+ for (l=0; l<n; l++)
+ {
+ s=p->cost[0][l];
+ for (k=1; k<m; k++)
+ if (p->cost[k][l]<s)
+ s=p->cost[k][l];
+ cost+=s;
+ if (s!=0.)
+ for (k=0; k<m; k++)
+ p->cost[k][l]-=s;
+ }
+ // End subtract column minima in order to start with lots of zeroes 12
+
+ // Begin initial state 16
+ t=0;
+ for (l=0; l<n; l++)
+ {
+ row_mate[l]= -1;
+ parent_row[l]= -1;
+ col_inc[l]=0.;
+ slack[l]=INF;
+ }
+ for (k=0; k<m; k++)
+ {
+ s=p->cost[k][0];
+ for (l=1; l<n; l++)
+ if (p->cost[k][l]<s)
+ s=p->cost[k][l];
+ row_dec[k]=s;
+ for (l=0; l<n; l++)
+ if (s==p->cost[k][l] && row_mate[l]<0)
+ {
+ col_mate[k]=l;
+ row_mate[l]=k;
+ goto row_done;
+ }
+ col_mate[k]= -1;
+ unchosen_row[t++]=k;
row_done:
- ;
- }
- // End initial state 16
-
- // Begin Hungarian algorithm 18
- if (t==0)
- goto done;
- unmatched=t;
- while (1)
- {
- q=0;
- while (1)
- {
- while (q<t)
- {
- // Begin explore node q of the forest 19
- {
- k=unchosen_row[q];
- s=row_dec[k];
- for (l=0; l<n; l++)
- if (slack[l] > 0.)
- {
- double del=p->cost[k][l]-s+col_inc[l];
- if (del<slack[l])
- {
- if (del==0.)
- {
- if (row_mate[l]<0)
- goto breakthru;
- slack[l]=0.;
- parent_row[l]=k;
- unchosen_row[t++]=row_mate[l];
- }
- else
- {
- slack[l]=del;
- slack_row[l]=k;
- }
- }
- }
- }
- // End explore node q of the forest 19
- q++;
- }
-
- // Begin introduce a new zero into the matrix 21
- s=INF;
- for (l=0; l<n; l++)
- if (slack[l]>0. && slack[l]<s)
- s=slack[l];
- for (q=0; q<t; q++)
- row_dec[unchosen_row[q]]+=s;
- for (l=0; l<n; l++)
- if (slack[l]>0.)
- {
- slack[l]-=s;
- if (slack[l]==0.)
- {
- // Begin look at a new zero 22
- k=slack_row[l];
- if (row_mate[l]<0)
- {
- for (j=l+1; j<n; j++)
- if (slack[j]==0.)
- col_inc[j]+=s;
- goto breakthru;
- }
- else
- {
- parent_row[l]=k;
- unchosen_row[t++]=row_mate[l];
- }
- // End look at a new zero 22
- }
- }
- else
- col_inc[l]+=s;
- // End introduce a new zero into the matrix 21
- }
+ ;
+ }
+ // End initial state 16
+
+ // Begin Hungarian algorithm 18
+ if (t==0)
+ goto done;
+ unmatched=t;
+ while (1)
+ {
+ q=0;
+ while (1)
+ {
+ while (q<t)
+ {
+ // Begin explore node q of the forest 19
+ {
+ k=unchosen_row[q];
+ s=row_dec[k];
+ for (l=0; l<n; l++)
+ if (slack[l] > 0.)
+ {
+ double del=p->cost[k][l]-s+col_inc[l];
+ if (del<slack[l])
+ {
+ if (del==0.)
+ {
+ if (row_mate[l]<0)
+ goto breakthru;
+ slack[l]=0.;
+ parent_row[l]=k;
+ unchosen_row[t++]=row_mate[l];
+ }
+ else
+ {
+ slack[l]=del;
+ slack_row[l]=k;
+ }
+ }
+ }
+ }
+ // End explore node q of the forest 19
+ q++;
+ }
+
+ // Begin introduce a new zero into the matrix 21
+ s=INF;
+ for (l=0; l<n; l++)
+ if (slack[l]>0. && slack[l]<s)
+ s=slack[l];
+ for (q=0; q<t; q++)
+ row_dec[unchosen_row[q]]+=s;
+ for (l=0; l<n; l++)
+ if (slack[l]>0.)
+ {
+ slack[l]-=s;
+ if (slack[l]==0.)
+ {
+ // Begin look at a new zero 22
+ k=slack_row[l];
+ if (row_mate[l]<0)
+ {
+ for (j=l+1; j<n; j++)
+ if (slack[j]==0.)
+ col_inc[j]+=s;
+ goto breakthru;
+ }
+ else
+ {
+ parent_row[l]=k;
+ unchosen_row[t++]=row_mate[l];
+ }
+ // End look at a new zero 22
+ }
+ }
+ else
+ col_inc[l]+=s;
+ // End introduce a new zero into the matrix 21
+ }
breakthru:
- // Begin update the matching 20
- while (1)
- {
- j=col_mate[k];
- col_mate[k]=l;
- row_mate[l]=k;
- if (j<0)
- break;
- k=parent_row[j];
- l=j;
- }
- // End update the matching 20
- if (--unmatched==0)
- goto done;
- // Begin get ready for another stage 17
- t=0;
- for (l=0; l<n; l++)
- {
- parent_row[l]= -1;
- slack[l]=INF;
- }
- for (k=0; k<m; k++)
- if (col_mate[k]<0)
- unchosen_row[t++]=k;
- // End get ready for another stage 17
- }
+ // Begin update the matching 20
+ while (1)
+ {
+ j=col_mate[k];
+ col_mate[k]=l;
+ row_mate[l]=k;
+ if (j<0)
+ break;
+ k=parent_row[j];
+ l=j;
+ }
+ // End update the matching 20
+ if (--unmatched==0)
+ goto done;
+ // Begin get ready for another stage 17
+ t=0;
+ for (l=0; l<n; l++)
+ {
+ parent_row[l]= -1;
+ slack[l]=INF;
+ }
+ for (k=0; k<m; k++)
+ if (col_mate[k]<0)
+ unchosen_row[t++]=k;
+ // End get ready for another stage 17
+ }
done:
-/* // Begin doublecheck the solution 23
- for (k=0; k<m; k++)
- for (l=0; l<n; l++)
- if (p->cost[k][l]<row_dec[k]-col_inc[l])
- exit(0);
- for (k=0; k<m; k++)
- {
- l=col_mate[k];
- if (l<0 || p->cost[k][l]!=row_dec[k]-col_inc[l])
- exit(0);
- }
- k=0;
- for (l=0; l<n; l++)
- if (col_inc[l])
- k++;
- if (k>m)
- exit(0);
- // End doublecheck the solution 23
-*/ // End Hungarian algorithm 18
-
- for (i=0; i<m; ++i)
- {
- p->assignment[i][col_mate[i]]=HUNGARIAN_ASSIGNED;
- /*TRACE("%d - %d\n", i, col_mate[i]);*/
- }
- for (k=0; k<m; ++k)
- {
- for (l=0; l<n; ++l)
- {
- /*TRACE("%d ",p->cost[k][l]-row_dec[k]+col_inc[l]);*/
- p->cost[k][l]=p->cost[k][l]-row_dec[k]+col_inc[l];
- }
- /*TRACE("\n");*/
- }
- for (i=0; i<m; i++)
- cost+=row_dec[i];
- for (i=0; i<n; i++)
- cost-=col_inc[i];
-
- free(slack);
- free(col_inc);
- free(parent_row);
- free(row_mate);
- free(slack_row);
- free(row_dec);
- free(unchosen_row);
- free(col_mate);
+/* // Begin doublecheck the solution 23
+ for (k=0; k<m; k++)
+ for (l=0; l<n; l++)
+ if (p->cost[k][l]<row_dec[k]-col_inc[l])
+ exit(0);
+ for (k=0; k<m; k++)
+ {
+ l=col_mate[k];
+ if (l<0 || p->cost[k][l]!=row_dec[k]-col_inc[l])
+ exit(0);
+ }
+ k=0;
+ for (l=0; l<n; l++)
+ if (col_inc[l])
+ k++;
+ if (k>m)
+ exit(0);
+ // End doublecheck the solution 23
+*/ // End Hungarian algorithm 18
+
+ for (i=0; i<m; ++i)
+ {
+ p->assignment[i][col_mate[i]]=HUNGARIAN_ASSIGNED;
+ /*TRACE("%d - %d\n", i, col_mate[i]);*/
+ }
+ for (k=0; k<m; ++k)
+ {
+ for (l=0; l<n; ++l)
+ {
+ /*TRACE("%d ",p->cost[k][l]-row_dec[k]+col_inc[l]);*/
+ p->cost[k][l]=p->cost[k][l]-row_dec[k]+col_inc[l];
+ }
+ /*TRACE("\n");*/
+ }
+ for (i=0; i<m; i++)
+ cost+=row_dec[i];
+ for (i=0; i<n; i++)
+ cost-=col_inc[i];
+
+ free(slack);
+ free(col_inc);
+ free(parent_row);
+ free(row_mate);
+ free(slack_row);
+ free(row_dec);
+ free(unchosen_row);
+ free(col_mate);
}
// Get the optimal assignment, by calling hungarian_solve above; "distances" in columns
void hungarianAlgorithm(double* distances, int* pn, int* assignment)
{
- int n = *pn;
- double** mat = array_to_matrix(distances, n, n);
+ int n = *pn;
+ double** mat = array_to_matrix(distances, n, n);
hungarian_problem_t p;
- hungarian_init(&p, mat, n, n, HUNGARIAN_MODE_MINIMIZE_COST);
+ hungarian_init(&p, mat, n, n, HUNGARIAN_MODE_MINIMIZE_COST);
- hungarian_solve(&p);
+ hungarian_solve(&p);
- // Copy the optimal assignment in a R-friendly structure
- for (int i=0; i < n; i++) {
- for (int j=0; j < n; j++) {
- if (p.assignment[i][j])
- assignment[i] = j+1; //add 1 since we work with R
- }
- }
+ // Copy the optimal assignment in a R-friendly structure
+ for (int i=0; i < n; i++) {
+ for (int j=0; j < n; j++) {
+ if (p.assignment[i][j])
+ assignment[i] = j+1; //add 1 since we work with R
+ }
+ }
hungarian_free(&p);
- for (int i=0; i<n; i++)
- free(mat[i]);
- free(mat);
+ for (int i=0; i<n; i++)
+ free(mat[i]);
+ free(mat);
}
extern void Compute_Omega(void*, void*, void*, void*, void*, void*);
static const R_CMethodDef CEntries[] = {
- {"hungarianAlgorithm", (DL_FUNC) &hungarianAlgorithm, 3},
- {"Moments_M2", (DL_FUNC) &Moments_M2, 5},
- {"Moments_M3", (DL_FUNC) &Moments_M3, 5},
- {"Compute_Omega", (DL_FUNC) &Compute_Omega, 6},
- {NULL, NULL, 0}
+ {"hungarianAlgorithm", (DL_FUNC) &hungarianAlgorithm, 3},
+ {"Moments_M2", (DL_FUNC) &Moments_M2, 5},
+ {"Moments_M3", (DL_FUNC) &Moments_M3, 5},
+ {"Compute_Omega", (DL_FUNC) &Compute_Omega, 6},
+ {NULL, NULL, 0}
};
void R_init_morpheus(DllInfo *dll)
{
- R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
- R_useDynamicSymbols(dll, FALSE);
+ R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
+ R_useDynamicSymbols(dll, FALSE);
}
# (Slower, but trusted) R version of Moments_M2
.Moments_M2_R = function(X,Y)
{
- library(tensor)
- n = nrow(X)
- d = ncol(X)
- v = rep(0,d)
- e = diag(rep(1,d))
+ library(tensor)
+ n = nrow(X)
+ d = ncol(X)
+ v = rep(0,d)
+ e = diag(rep(1,d))
- M21 = M2_1 = tensor(v,v)
- for (i in 1:n)
- M21 = M21 + Y[i] * tensor(X[i,],X[i,])
- M21 = (1/n) * M21
+ M21 = M2_1 = tensor(v,v)
+ for (i in 1:n)
+ M21 = M21 + Y[i] * tensor(X[i,],X[i,])
+ M21 = (1/n) * M21
- for (j in 1:d)
- {
- L = tensor(v,v)
- for (i in 1:n)
- L = L + Y[i]*tensor(e[,j],e[,j])
- L = (1/n) * L
- M2_1 = M2_1 + L
- }
+ for (j in 1:d)
+ {
+ L = tensor(v,v)
+ for (i in 1:n)
+ L = L + Y[i]*tensor(e[,j],e[,j])
+ L = (1/n) * L
+ M2_1 = M2_1 + L
+ }
- M2 = M21 - M2_1
- return (M2)
+ M2 = M21 - M2_1
+ return (M2)
}
test_that("both versions of Moments_M2 agree on various inputs",
{
- for (n in c(20,200))
- {
- for (d in c(2,10,20))
- {
- X = matrix( runif(n*d,min=-1,max=1), nrow=n )
- Y = runif(n,min=-1,max=1)
- M2 = .Moments_M2(X,Y)
- M2_R = .Moments_M2_R(X,Y)
- expect_equal(max(abs(M2 - M2_R)), 0)
- }
- }
+ for (n in c(20,200))
+ {
+ for (d in c(2,10,20))
+ {
+ X = matrix( runif(n*d,min=-1,max=1), nrow=n )
+ Y = runif(n,min=-1,max=1)
+ M2 = .Moments_M2(X,Y)
+ M2_R = .Moments_M2_R(X,Y)
+ expect_equal(max(abs(M2 - M2_R)), 0)
+ }
+ }
})
# (Slower, but trusted) R version of Moments_M3
.Moments_M3_R = function(X,Y)
{
- library(tensor)
- n = nrow(X)
- d = ncol(X)
- v = rep(0,d)
- e = diag(rep(1,d))
+ library(tensor)
+ n = nrow(X)
+ d = ncol(X)
+ v = rep(0,d)
+ e = diag(rep(1,d))
- M31=M3_1=M3_2=M3_3 = tensor(tensor(v,v),v)
- for (i in 1:n)
- M31 = M31 + (Y[i]*tensor(tensor(X[i,],X[i,]),X[i,]))
- M31 = (1/n)*M31
+ M31=M3_1=M3_2=M3_3 = tensor(tensor(v,v),v)
+ for (i in 1:n)
+ M31 = M31 + (Y[i]*tensor(tensor(X[i,],X[i,]),X[i,]))
+ M31 = (1/n)*M31
- for(j in 1:d)
- {
- l1=l2=l3 = tensor(tensor(v,v),v)
- for(i in 1:n)
- {
- l1 = l1 + Y[i]*tensor(tensor(e[,j],X[i,]),e[,j])
- l2 = l2 + Y[i]*tensor(tensor(e[,j],e[,j]),X[i,])
- l3 = l3 + Y[i]*tensor(tensor(X[i,],e[,j]),e[,j])
- }
- l1 = (1/n)*l1
- l2 = (1/n)*l2
- l3 = (1/n)*l3
- M3_1 = M3_1+l1
- M3_2 = M3_2+l2
- M3_3 = M3_3+l3
- }
+ for(j in 1:d)
+ {
+ l1=l2=l3 = tensor(tensor(v,v),v)
+ for(i in 1:n)
+ {
+ l1 = l1 + Y[i]*tensor(tensor(e[,j],X[i,]),e[,j])
+ l2 = l2 + Y[i]*tensor(tensor(e[,j],e[,j]),X[i,])
+ l3 = l3 + Y[i]*tensor(tensor(X[i,],e[,j]),e[,j])
+ }
+ l1 = (1/n)*l1
+ l2 = (1/n)*l2
+ l3 = (1/n)*l3
+ M3_1 = M3_1+l1
+ M3_2 = M3_2+l2
+ M3_3 = M3_3+l3
+ }
- M3 = M31-(M3_1+M3_2+M3_3)
- return (M3)
+ M3 = M31-(M3_1+M3_2+M3_3)
+ return (M3)
}
test_that("both versions of Moments_M3 agree on various inputs",
{
- for (n in c(20,200))
- {
- for (d in c(2,10,20))
- {
- X = matrix( runif(n*d,min=-1,max=1), nrow=n )
- Y = runif(n,min=-1,max=1)
- M3 = .Moments_M3(X,Y)
- M3_R = .Moments_M3_R(X,Y)
- expect_equal(max(abs(M3 - M3_R)), 0)
- }
- }
+ for (n in c(20,200))
+ {
+ for (d in c(2,10,20))
+ {
+ X = matrix( runif(n*d,min=-1,max=1), nrow=n )
+ Y = runif(n,min=-1,max=1)
+ M3 = .Moments_M3(X,Y)
+ M3_R = .Moments_M3_R(X,Y)
+ expect_equal(max(abs(M3 - M3_R)), 0)
+ }
+ }
})
# Helper to generate a random series of matrices to align
.generateMatrices = function(d, K, N, noise)
{
- matrices = list( matrix(runif(d*K, min=-1, max=1),ncol=K) ) #reference
- for (i in 2:N)
- {
- matrices[[i]] <- matrices[[1]][,sample(1:K)]
- if (noise)
- matrices[[i]] = matrices[[i]] + matrix(rnorm(d*K, sd=0.05), ncol=K)
- }
- matrices
+ matrices = list( matrix(runif(d*K, min=-1, max=1),ncol=K) ) #reference
+ for (i in 2:N)
+ {
+ matrices[[i]] <- matrices[[1]][,sample(1:K)]
+ if (noise)
+ matrices[[i]] = matrices[[i]] + matrix(rnorm(d*K, sd=0.05), ncol=K)
+ }
+ matrices
}
test_that("labelSwitchingAlign correctly aligns de-noised parameters",
{
- N = 30 #number of matrices
- d_K_list = list(c(2,2), c(5,3))
- for (i in 1:2)
- {
- d = d_K_list[[i]][1]
- K = d_K_list[[i]][2]
+ N = 30 #number of matrices
+ d_K_list = list(c(2,2), c(5,3))
+ for (i in 1:2)
+ {
+ d = d_K_list[[i]][1]
+ K = d_K_list[[i]][2]
- # 1] Generate matrix series
- matrices_permut = .generateMatrices(d,K,N,noise=FALSE)
+ # 1] Generate matrix series
+ matrices_permut = .generateMatrices(d,K,N,noise=FALSE)
- # 2] Call align function with mode=approx1
- matrices_aligned =
- alignMatrices(matrices_permut[2:N], ref=matrices_permut[[1]], ls_mode="approx1")
+ # 2] Call align function with mode=approx1
+ matrices_aligned =
+ alignMatrices(matrices_permut[2:N], ref=matrices_permut[[1]], ls_mode="approx1")
- # 3] Check alignment
- for (j in 2:N)
- expect_equal(matrices_aligned[[j-1]], matrices_permut[[1]])
+ # 3] Check alignment
+ for (j in 2:N)
+ expect_equal(matrices_aligned[[j-1]], matrices_permut[[1]])
- # 2bis] Call align function with mode=approx2
- matrices_aligned =
- alignMatrices(matrices_permut[2:N], ref=matrices_permut[[1]], ls_mode="approx2")
+ # 2bis] Call align function with mode=approx2
+ matrices_aligned =
+ alignMatrices(matrices_permut[2:N], ref=matrices_permut[[1]], ls_mode="approx2")
- # 3bis] Check alignment
- for (j in 2:N)
- expect_equal(matrices_aligned[[j-1]], matrices_permut[[1]])
- }
+ # 3bis] Check alignment
+ for (j in 2:N)
+ expect_equal(matrices_aligned[[j-1]], matrices_permut[[1]])
+ }
})
test_that("labelSwitchingAlign correctly aligns noisy parameters",
{
- N = 30 #number of matrices
- d_K_list = list(c(2,2), c(5,3))
- for (i in 1:2)
- {
- d = d_K_list[[i]][1]
- K = d_K_list[[i]][2]
- max_error = d * 0.2 #TODO: what value to choose ?
+ N = 30 #number of matrices
+ d_K_list = list(c(2,2), c(5,3))
+ for (i in 1:2)
+ {
+ d = d_K_list[[i]][1]
+ K = d_K_list[[i]][2]
+ max_error = d * 0.2 #TODO: what value to choose ?
- # 1] Generate matrix series
- matrices_permut = .generateMatrices(d,K,N,noise=TRUE)
+ # 1] Generate matrix series
+ matrices_permut = .generateMatrices(d,K,N,noise=TRUE)
- # 2] Call align function
- matrices_aligned = alignMatrices(matrices_permut, ref="mean", ls_mode="exact")
+ # 2] Call align function
+ matrices_aligned = alignMatrices(matrices_permut, ref="mean", ls_mode="exact")
- # 3] Check alignment
- for (j in 2:N)
- {
- expect_that( norm(matrices_aligned[[j]] - matrices_permut[[1]]),
- is_less_than(max_error) )
- }
- }
+ # 3] Check alignment
+ for (j in 2:N)
+ {
+ expect_that( norm(matrices_aligned[[j]] - matrices_permut[[1]]),
+ is_less_than(max_error) )
+ }
+ }
})
test_that("on input of sufficient size, β/||β|| is estimated accurately enough",
{
- n = 100000
- d = 2
- K = 2
- p = 1/2
+ n = 100000
+ d = 2
+ K = 2
+ p = 1/2
- βs_ref = array( c(1,0,0,1 , 1,-2,3,1), dim=c(d,K,2) )
- for (i in 1:(dim(βs_ref)[3]))
- {
- μ_ref = normalize(βs_ref[,,i])
- for (model in c("logit","probit"))
- {
- cat("\n\n",model," :\n",sep="")
+ βs_ref = array( c(1,0,0,1 , 1,-2,3,1), dim=c(d,K,2) )
+ for (i in 1:(dim(βs_ref)[3]))
+ {
+ μ_ref = normalize(βs_ref[,,i])
+ for (model in c("logit","probit"))
+ {
+ cat("\n\n",model," :\n",sep="")
- io = generateSampleIO(n, p, βs_ref[,,i], rep(0,K), model)
- μ = computeMu(io$X, io$Y, list(K=K))
- μ_aligned = alignMatrices(list(μ), ref=μ_ref, ls_mode="exact")[[1]]
+ io = generateSampleIO(n, p, βs_ref[,,i], rep(0,K), model)
+ μ = computeMu(io$X, io$Y, list(K=K))
+ μ_aligned = alignMatrices(list(μ), ref=μ_ref, ls_mode="exact")[[1]]
- #Some traces: 0 is not well estimated, but others are OK
- cat("Reference normalized matrix:\n")
- print(μ_ref)
- cat("Estimated normalized matrix:\n")
- print(μ_aligned)
- cat("Difference norm (Matrix norm ||.||_1, max. abs. sum on a column)\n")
- diff_norm = norm(μ_ref - μ_aligned)
- cat(diff_norm,"\n")
+ #Some traces: 0 is not well estimated, but others are OK
+ cat("Reference normalized matrix:\n")
+ print(μ_ref)
+ cat("Estimated normalized matrix:\n")
+ print(μ_aligned)
+ cat("Difference norm (Matrix norm ||.||_1, max. abs. sum on a column)\n")
+ diff_norm = norm(μ_ref - μ_aligned)
+ cat(diff_norm,"\n")
- #NOTE: 0.5 is loose threshold, but values around 0.3 are expected...
- expect_that( diff_norm, is_less_than(0.5) )
- }
- }
+ #NOTE: 0.5 is loose threshold, but values around 0.3 are expected...
+ expect_that( diff_norm, is_less_than(0.5) )
+ }
+ }
})
test_that("HungarianAlgorithm provides the correct answer on various inputs",
{
- for (n in c(2,3,10,50))
- {
- for (repetition in 1:3)
- {
- # Generate a random vector, and permute it:
- # we expect the algorithm to retrieve the permutation
- v = runif(n, min=-1, max=1)
- permutation = sample(1:n)
- v_p = v[permutation]
+ for (n in c(2,3,10,50))
+ {
+ for (repetition in 1:3)
+ {
+ # Generate a random vector, and permute it:
+ # we expect the algorithm to retrieve the permutation
+ v = runif(n, min=-1, max=1)
+ permutation = sample(1:n)
+ v_p = v[permutation]
- # v is reference, v_p is v after permutation
- # distances[i,j] = distance between i-th component of v and j-th component of v_p
- # "in rows : reference; in columns: after permutation"
- # "permutation" contains order on v to match v_p as closely as possible
- distances = sapply(v_p, function(elt) ( abs(elt - v) ) )
- assignment = .hungarianAlgorithm(distances)
- expect_equal(assignment, permutation)
- }
- }
+ # v is reference, v_p is v after permutation
+ # distances[i,j] = distance between i-th component of v and j-th component of v_p
+ # "in rows : reference; in columns: after permutation"
+ # "permutation" contains order on v to match v_p as closely as possible
+ distances = sapply(v_p, function(elt) ( abs(elt - v) ) )
+ assignment = .hungarianAlgorithm(distances)
+ expect_equal(assignment, permutation)
+ }
+ }
})
#auxiliary to test diagonality
.computeMuCheckDiag = function(X, Y, K, jd_method, β_ref)
{
- d = ncol(X)
- #TODO: redundant code, same as computeMu() main method. Comments are stripped
- M3 = .Moments_M3(X,Y)
- M2_t = array(dim=c(d,d,d))
- for (i in 1:d)
- {
- ρ = c(rep(0,i-1),1,rep(0,d-i))
- M2_t[,,i] = .T_I_I_w(M3,ρ)
- }
- jd = jointDiag::ajd(M2_t, method=jd_method)
- V = if (jd_method=="uwedge") jd$B else solve(jd$A)
- M2_t = array(dim=c(d,d,K))
- for (i in 1:K)
- M2_t[,,i] = .T_I_I_w(M3,V[,i])
- #END of computeMu() code
+ d = ncol(X)
+ #TODO: redundant code, same as computeMu() main method. Comments are stripped
+ M3 = .Moments_M3(X,Y)
+ M2_t = array(dim=c(d,d,d))
+ for (i in 1:d)
+ {
+ ρ = c(rep(0,i-1),1,rep(0,d-i))
+ M2_t[,,i] = .T_I_I_w(M3,ρ)
+ }
+ jd = jointDiag::ajd(M2_t, method=jd_method)
+ V = if (jd_method=="uwedge") jd$B else solve(jd$A)
+ M2_t = array(dim=c(d,d,K))
+ for (i in 1:K)
+ M2_t[,,i] = .T_I_I_w(M3,V[,i])
+ #END of computeMu() code
- max_error = 0.5 #TODO: tune ?
- invβ = MASS::ginv(β_ref)
- for (i in 1:K)
- {
- shouldBeDiag = invβ %*% M2_t[,,i] %*% t(invβ)
- expect_that(
- mean( abs(shouldBeDiag[upper.tri(shouldBeDiag) | lower.tri(shouldBeDiag)]) ),
- is_less_than(max_error) )
- }
+ max_error = 0.5 #TODO: tune ?
+ invβ = MASS::ginv(β_ref)
+ for (i in 1:K)
+ {
+ shouldBeDiag = invβ %*% M2_t[,,i] %*% t(invβ)
+ expect_that(
+ mean( abs(shouldBeDiag[upper.tri(shouldBeDiag) | lower.tri(shouldBeDiag)]) ),
+ is_less_than(max_error) )
+ }
}
test_that("'jedi' and 'uwedge' joint-diagonalization methods return a correct matrix",
{
- n = 10000
- d_K = list( c(2,2), c(5,3), c(20,13) )
+ n = 10000
+ d_K = list( c(2,2), c(5,3), c(20,13) )
- for (dk_index in 1:length(d_K))
- {
- d = d_K[[dk_index]][1]
- K = d_K[[dk_index]][2]
- #NOTE: sometimes large errors if pr is not balanced enough (e.g. random);
- # same note for β. However we could be more random than that...
- β_ref = rbind(diag(K),matrix(0,nrow=d-K,ncol=K))
- io = generateSampleIO(n, p=rep(1/K,K-1), β=β_ref, rep(0,K), link="logit")
- .computeMuCheckDiag(io$X, io$Y, K, jd_method="uwedge", β_ref)
- #TODO: some issues with jedi method (singular system)
- #.computeMuCheckDiag(io$X, io$Y, K, jd_method="jedi", β_ref)
- }
+ for (dk_index in 1:length(d_K))
+ {
+ d = d_K[[dk_index]][1]
+ K = d_K[[dk_index]][2]
+ #NOTE: sometimes large errors if pr is not balanced enough (e.g. random);
+ # same note for β. However we could be more random than that...
+ β_ref = rbind(diag(K),matrix(0,nrow=d-K,ncol=K))
+ io = generateSampleIO(n, p=rep(1/K,K-1), β=β_ref, rep(0,K), link="logit")
+ .computeMuCheckDiag(io$X, io$Y, K, jd_method="uwedge", β_ref)
+ #TODO: some issues with jedi method (singular system)
+ #.computeMuCheckDiag(io$X, io$Y, K, jd_method="jedi", β_ref)
+ }
})
naive_f = function(link, M1,M2,M3, p,β,b)
{
- d = length(M1)
- K = length(p)
- λ <- sqrt(colSums(β^2))
+ d = length(M1)
+ K = length(p)
+ λ <- sqrt(colSums(β^2))
- # Compute β x2,3 (self) tensorial products
- β2 = array(0, dim=c(d,d,K))
- β3 = array(0, dim=c(d,d,d,K))
- for (k in 1:K)
- {
- for (i in 1:d)
- {
- for (j in 1:d)
- {
- β2[i,j,k] = β[i,k]*β[j,k]
- for (l in 1:d)
- β3[i,j,l,k] = β[i,k]*β[j,k]*β[l,k]
- }
- }
- }
+ # Compute β x2,3 (self) tensorial products
+ β2 = array(0, dim=c(d,d,K))
+ β3 = array(0, dim=c(d,d,d,K))
+ for (k in 1:K)
+ {
+ for (i in 1:d)
+ {
+ for (j in 1:d)
+ {
+ β2[i,j,k] = β[i,k]*β[j,k]
+ for (l in 1:d)
+ β3[i,j,l,k] = β[i,k]*β[j,k]*β[l,k]
+ }
+ }
+ }
- res = 0
- for (i in 1:d)
- {
- term = 0
- for (k in 1:K)
- term = term + p[k]*.G(link,1,λ[k],b[k])*β[i,k]
- res = res + (term - M1[i])^2
- for (j in 1:d)
- {
- term = 0
- for (k in 1:K)
- term = term + p[k]*.G(link,2,λ[k],b[k])*β2[i,j,k]
- res = res + (term - M2[i,j])^2
- for (l in 1:d)
- {
- term = 0
- for (k in 1:K)
- term = term + p[k]*.G(link,3,λ[k],b[k])*β3[i,j,l,k]
- res = res + (term - M3[i,j,l])^2
- }
- }
- }
- res
+ res = 0
+ for (i in 1:d)
+ {
+ term = 0
+ for (k in 1:K)
+ term = term + p[k]*.G(link,1,λ[k],b[k])*β[i,k]
+ res = res + (term - M1[i])^2
+ for (j in 1:d)
+ {
+ term = 0
+ for (k in 1:K)
+ term = term + p[k]*.G(link,2,λ[k],b[k])*β2[i,j,k]
+ res = res + (term - M2[i,j])^2
+ for (l in 1:d)
+ {
+ term = 0
+ for (k in 1:K)
+ term = term + p[k]*.G(link,3,λ[k],b[k])*β3[i,j,l,k]
+ res = res + (term - M3[i,j,l])^2
+ }
+ }
+ }
+ res
}
test_that("naive computation provides the same result as vectorized computations",
{
- h <- 1e-7 #for finite-difference tests
- tol <- 1e-3 #large tolerance, necessary in some cases... (generally 1e-6 is OK)
- for (dK in list( c(2,2), c(5,3)))
- {
- d = dK[1]
- K = dK[2]
+ h <- 1e-7 #for finite-difference tests
+ tol <- 1e-3 #large tolerance, necessary in some cases... (generally 1e-6 is OK)
+ for (dK in list( c(2,2), c(5,3)))
+ {
+ d = dK[1]
+ K = dK[2]
- M1 = runif(d, -1, 1)
- M2 = matrix(runif(d*d,-1,1), ncol=d)
- M3 = array(runif(d*d*d,-1,1), dim=c(d,d,d))
+ M1 = runif(d, -1, 1)
+ M2 = matrix(runif(d*d,-1,1), ncol=d)
+ M3 = array(runif(d*d*d,-1,1), dim=c(d,d,d))
- for (link in c("logit","probit"))
- {
- op = new("OptimParams", "li"=link, "M1"=as.double(M1),
- "M2"=as.double(M2), "M3"=as.double(M3), "K"=as.integer(K))
+ for (link in c("logit","probit"))
+ {
+ op = new("OptimParams", "li"=link, "M1"=as.double(M1),
+ "M2"=as.double(M2), "M3"=as.double(M3), "K"=as.integer(K))
- for (var in seq_len((2+d)*K-1))
- {
- p = runif(K, 0, 1)
- p = p / sum(p)
- β <- matrix(runif(d*K,-5,5),ncol=K)
- b = runif(K, -5, 5)
- x <- c(p[1:(K-1)],as.double(β),b)
+ for (var in seq_len((2+d)*K-1))
+ {
+ p = runif(K, 0, 1)
+ p = p / sum(p)
+ β <- matrix(runif(d*K,-5,5),ncol=K)
+ b = runif(K, -5, 5)
+ x <- c(p[1:(K-1)],as.double(β),b)
- # Test functions values
- expect_equal( op$f(x), naive_f(link,M1,M2,M3, p,β,b) )
+ # Test functions values
+ expect_equal( op$f(x), naive_f(link,M1,M2,M3, p,β,b) )
- # Test finite differences ~= gradient values
- dir_h <- rep(0, (2+d)*K-1)
- dir_h[var] = h
+ # Test finite differences ~= gradient values
+ dir_h <- rep(0, (2+d)*K-1)
+ dir_h[var] = h
- expect_equal( op$grad_f(x)[var], ( op$f(x+dir_h) - op$f(x) ) / h, tol )
- }
- }
- }
+ expect_equal( op$grad_f(x)[var], ( op$f(x+dir_h) - op$f(x) ) / h, tol )
+ }
+ }
+ }
})