pkg/R/utils.R

   1 #' normalize
   2 #'
   3 #' Normalize a vector or a matrix (by columns), using euclidian norm
   4 #'
   5 #' @param x Vector or matrix to be normalized
   6 #'
   7 #' @return The normalized matrix (1 column if x is a vector)
   8 #'
   9 #' @examples
  10 #' x <- matrix(c(1,2,-1,3), ncol=2)
  11 #' normalize(x) #column 1 is 1/sqrt(5) (1 2),
  12 #'              #and column 2 is 1/sqrt(10) (-1, 3)
  13 #' @export
  14 normalize = function(x)
  15 {
  16   x = as.matrix(x)
  17   norm2 = sqrt( colSums(x^2) )
  18   sweep(x, 2, norm2, '/')
  19 }
  20
  21 # Computes a tensor-vector product
  22 #
  23 # @param Te third-order tensor (size dxdxd)
  24 # @param w vector of size d
  25 #
  26 # @return Matrix of size dxd
  27 #
  28 .T_I_I_w = function(Te, w)
  29 {
  30   d = length(w)
  31   Ma = matrix(0,nrow=d,ncol=d)
  32   for (j in 1:d)
  33     Ma = Ma + w[j] * Te[,,j]
  34   Ma
  35 }
  36
  37 # Computes the second-order empirical moment between input X and output Y
  38 #
  39 # @param X matrix of covariates (of size n*d)
  40 # @param Y vector of responses (of size n)
  41 #
  42 # @return Matrix of size dxd
  43 #
  44 .Moments_M2 = function(X, Y)
  45 {
  46   n = nrow(X)
  47   d = ncol(X)
  48   M2 = matrix(0,nrow=d,ncol=d)
  49   matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
  50     pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
  51 }
  52
  53 # Computes the third-order empirical moment between input X and output Y
  54 #
  55 # @param X matrix of covariates (of size n*d)
  56 # @param Y vector of responses (of size n)
  57 #
  58 # @return Array of size dxdxd
  59 #
  60 .Moments_M3 = function(X, Y)
  61 {
  62   n = nrow(X)
  63   d = ncol(X)
  64   M3 = array(0,dim=c(d,d,d))
  65   array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
  66     pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
  67 }
  68
  69 #' computeMoments
  70 #'
  71 #' Compute cross-moments of order 1,2,3 from X,Y
  72 #'
  73 #' @inheritParams computeMu
  74 #'
  75 #' @return A list L where L[[i]] is the i-th cross-moment
  76 #'
  77 #' @examples
  78 #' X <- matrix(rnorm(100), ncol=2)
  79 #' Y <- rbinom(100, 1, .5)
  80 #' M <- computeMoments(X, Y)
  81 #'
  82 #' @export
  83 computeMoments = function(X, Y)
  84   list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
  85
  86 # Find the optimal assignment (permutation) between two sets (minimize cost)
  87 #
  88 # @param distances The distances matrix, in columns
  89 #   (distances[i,j] is distance between i and j)
  90 #
  91 # @return A permutation minimizing cost
  92 #
  93 .hungarianAlgorithm = function(distances)
  94 {
  95   n = nrow(distances)
  96   .C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
  97     assignment=integer(n), PACKAGE="morpheus")$assignment
  98 }
  99
 100 #' alignMatrices
 101 #'
 102 #' Align a set of parameters matrices, with potential permutations.
 103 #'
 104 #' @param Ms A list of matrices, all of same size DxK
 105 #' @param ref A reference matrix to align other matrices with
 106 #' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
 107 #'   (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
 108 #'   "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
 109 #'   reference (resp. in current row) compare to all unassigned columns in current row
 110 #'   (resp. in reference)
 111 #'
 112 #' @return The aligned list (of matrices), of same size as Ms
 113 #'
 114 #' @examples
 115 #' m1 <- matrix(c(1,1,0,0),ncol=2)
 116 #' m2 <- matrix(c(0,0,1,1),ncol=2)
 117 #' ref <- m1
 118 #' Ms <- list(m1, m2, m1, m2)
 119 #' a <- alignMatrices(Ms, ref, "exact")
 120 #' # a[[i]] is expected to contain m1 for all i
 121 #'
 122 #' @export
 123 alignMatrices = function(Ms, ref, ls_mode=c("exact","approx1","approx2"))
 124 {
 125   if (!is.matrix(ref) || any(is.na(ref)))
 126     stop("ref: matrix, no NAs")
 127   ls_mode <- match.arg(ls_mode)
 128
 129   K <- ncol(Ms[[1]])
 130   L <- length(Ms)
 131   for (i in 1:L)
 132   {
 133     m = Ms[[i]] #shorthand
 134
 135     if (ls_mode == "exact")
 136     {
 137       #distances[i,j] = distance between m column i and ref column j
 138       distances = apply( ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
 139       assignment = .hungarianAlgorithm(distances)
 140       col <- m[,assignment]
 141       Ms[[i]] <- col
 142     }
 143     else
 144     {
 145       # Greedy matching:
 146       #   approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
 147       #   approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
 148       available_indices = 1:K
 149       for (j in 1:K)
 150       {
 151         distances =
 152           if (ls_mode == "approx1")
 153           {
 154             apply(as.matrix(m[,available_indices]), 2,
 155               function(col) ( sqrt(sum((col - ref[,j])^2)) ) )
 156           }
 157           else #approx2
 158           {
 159             apply(as.matrix(ref[,available_indices]), 2,
 160               function(col) ( sqrt(sum((col - m[,j])^2)) ) )
 161           }
 162         indMin = which.min(distances)
 163         if (ls_mode == "approx1")
 164         {
 165           col <- m[ , available_indices[indMin] ]
 166           Ms[[i]][,j] <- col
 167         }
 168         else #approx2
 169         {
 170           col <- available_indices[indMin]
 171           Ms[[i]][,col] <- m[,j]
 172         }
 173         available_indices = available_indices[-indMin]
 174       }
 175     }
 176   }
 177   Ms
 178 }