Simplify data sampling + cosmetics + draft unit test for W matrix in optim
[morpheus.git] / pkg / R / utils.R
CommitLineData
cbd88fe5
BA
1#' normalize
2#'
3#' Normalize a vector or a matrix (by columns), using euclidian norm
4#'
5#' @param X Vector or matrix to be normalized
6#'
7#' @return The normalized matrix (1 column if X is a vector)
8#'
9#' @export
10normalize = function(X)
11{
6dd5c2ac
BA
12 X = as.matrix(X)
13 norm2 = sqrt( colSums(X^2) )
14 sweep(X, 2, norm2, '/')
cbd88fe5
BA
15}
16
17# Computes a tensor-vector product
18#
19# @param Te third-order tensor (size dxdxd)
20# @param w vector of size d
21#
22# @return Matrix of size dxd
23#
24.T_I_I_w = function(Te, w)
25{
6dd5c2ac
BA
26 d = length(w)
27 Ma = matrix(0,nrow=d,ncol=d)
28 for (j in 1:d)
29 Ma = Ma + w[j] * Te[,,j]
30 Ma
cbd88fe5
BA
31}
32
33# Computes the second-order empirical moment between input X and output Y
34#
35# @param X matrix of covariates (of size n*d)
36# @param Y vector of responses (of size n)
37#
38# @return Matrix of size dxd
39#
40.Moments_M2 = function(X, Y)
41{
6dd5c2ac
BA
42 n = nrow(X)
43 d = ncol(X)
44 M2 = matrix(0,nrow=d,ncol=d)
45 matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
46 pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
cbd88fe5
BA
47}
48
49# Computes the third-order empirical moment between input X and output Y
50#
51# @param X matrix of covariates (of size n*d)
52# @param Y vector of responses (of size n)
53#
54# @return Array of size dxdxd
55#
56.Moments_M3 = function(X, Y)
57{
6dd5c2ac
BA
58 n = nrow(X)
59 d = ncol(X)
60 M3 = array(0,dim=c(d,d,d))
61 array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
62 pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
cbd88fe5
BA
63}
64
65#' computeMoments
66#'
67#' Compute cross-moments of order 1,2,3 from X,Y
68#'
69#' @inheritParams computeMu
70#'
71#' @return A list L where L[[i]] is the i-th cross-moment
72#'
73#' @export
74computeMoments = function(X, Y)
6dd5c2ac 75 list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
cbd88fe5
BA
76
77# Find the optimal assignment (permutation) between two sets (minimize cost)
78#
79# @param distances The distances matrix, in columns (distances[i,j] is distance between i
80# and j)
81#
82# @return A permutation minimizing cost
83#
84.hungarianAlgorithm = function(distances)
85{
6dd5c2ac
BA
86 n = nrow(distances)
87 .C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
88 assignment=integer(n), PACKAGE="morpheus")$assignment
cbd88fe5
BA
89}
90
91#' alignMatrices
92#'
93#' Align a set of parameters matrices, with potential permutations.
94#'
95#' @param Ms A list of matrices, all of same size DxK
96#' @param ref Either a reference matrix or "mean" to align on empirical mean
97#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
98#' (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
99#' "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
100#' reference (resp. in current row) compare to all unassigned columns in current row
101#' (resp. in reference)
102#'
103#' @return The aligned list (of matrices), of same size as Ms
104#'
105#' @export
106alignMatrices = function(Ms, ref, ls_mode)
107{
6dd5c2ac
BA
108 if (!is.matrix(ref) && ref != "mean")
109 stop("ref: matrix or 'mean'")
110 if (!ls_mode %in% c("exact","approx1","approx2"))
111 stop("ls_mode in {'exact','approx1','approx2'}")
cbd88fe5 112
6dd5c2ac
BA
113 K <- ncol(Ms[[1]])
114 if (is.character(ref)) #ref=="mean"
115 m_sum = Ms[[1]]
116 L <- length(Ms)
117 for (i in ifelse(is.character(ref),2,1):L)
118 {
119 m_ref = if (is.character(ref)) m_sum / (i-1) else ref
120 m = Ms[[i]] #shorthand
cbd88fe5 121
6dd5c2ac
BA
122 if (ls_mode == "exact")
123 {
124 #distances[i,j] = distance between m column i and ref column j
125 distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
126 assignment = .hungarianAlgorithm(distances)
127 col <- m[,assignment]
128 if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
129 }
130 else
131 {
132 # Greedy matching:
133 # approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
134 # approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
135 available_indices = 1:K
136 for (j in 1:K)
137 {
138 distances =
139 if (ls_mode == "approx1")
140 {
141 apply(as.matrix(m[,available_indices]), 2,
142 function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
143 }
144 else #approx2
145 {
146 apply(as.matrix(m_ref[,available_indices]), 2,
147 function(col) ( sqrt(sum((col - m[,j])^2)) ) )
148 }
149 indMin = which.min(distances)
150 if (ls_mode == "approx1")
151 {
152 col <- m[ , available_indices[indMin] ]
153 if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
154 }
155 else #approx2
156 {
157 col <- available_indices[indMin]
158 if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
159 }
160 available_indices = available_indices[-indMin]
161 }
162 }
cbd88fe5 163
6dd5c2ac
BA
164 # Update current sum with "label-switched" li[[i]]
165 if (is.character(ref)) #ref=="mean"
166 m_sum = m_sum + Ms[[i]]
167 }
168 Ms
cbd88fe5 169}