Prepare code for GLSMM implementation (generalized least squares)
[morpheus.git] / pkg / R / utils.R
CommitLineData
cbd88fe5
BA
1#' normalize
2#'
3#' Normalize a vector or a matrix (by columns), using euclidian norm
4#'
5#' @param X Vector or matrix to be normalized
6#'
7#' @return The normalized matrix (1 column if X is a vector)
8#'
9#' @export
10normalize = function(X)
11{
12 X = as.matrix(X)
13 norm2 = sqrt( colSums(X^2) )
14 sweep(X, 2, norm2, '/')
15}
16
17# Computes a tensor-vector product
18#
19# @param Te third-order tensor (size dxdxd)
20# @param w vector of size d
21#
22# @return Matrix of size dxd
23#
24.T_I_I_w = function(Te, w)
25{
26 d = length(w)
27 Ma = matrix(0,nrow=d,ncol=d)
28 for (j in 1:d)
29 Ma = Ma + w[j] * Te[,,j]
30 Ma
31}
32
33# Computes the second-order empirical moment between input X and output Y
34#
35# @param X matrix of covariates (of size n*d)
36# @param Y vector of responses (of size n)
37#
38# @return Matrix of size dxd
39#
40.Moments_M2 = function(X, Y)
41{
42 n = nrow(X)
43 d = ncol(X)
44 M2 = matrix(0,nrow=d,ncol=d)
45 matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
46 pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
47}
48
49# Computes the third-order empirical moment between input X and output Y
50#
51# @param X matrix of covariates (of size n*d)
52# @param Y vector of responses (of size n)
53#
54# @return Array of size dxdxd
55#
56.Moments_M3 = function(X, Y)
57{
58 n = nrow(X)
59 d = ncol(X)
60 M3 = array(0,dim=c(d,d,d))
61 array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
62 pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
63}
64
65#' computeMoments
66#'
67#' Compute cross-moments of order 1,2,3 from X,Y
68#'
69#' @inheritParams computeMu
70#'
71#' @return A list L where L[[i]] is the i-th cross-moment
72#'
73#' @export
74computeMoments = function(X, Y)
75 list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
76
4263503b
BA
77# Computes the Omega matrix for generalized least square method
78#
79# @param X matrix of covariates (of size n*d)
80# @param Y vector of responses (of size n)
81# @param theta list with p, beta, b
82#
83# @return Matrix of size dimxdim where dim=d+d^2+d^3
84#
85.Moments_M3 = function(X, Y)
86{
87 n = nrow(X)
88 d = ncol(X)
89 M3 = array(0,dim=c(d,d,d))
90 array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
91 pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
92}
93
cbd88fe5
BA
94# Find the optimal assignment (permutation) between two sets (minimize cost)
95#
96# @param distances The distances matrix, in columns (distances[i,j] is distance between i
97# and j)
98#
99# @return A permutation minimizing cost
100#
101.hungarianAlgorithm = function(distances)
102{
103 n = nrow(distances)
104 .C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
105 assignment=integer(n), PACKAGE="morpheus")$assignment
106}
107
108#' alignMatrices
109#'
110#' Align a set of parameters matrices, with potential permutations.
111#'
112#' @param Ms A list of matrices, all of same size DxK
113#' @param ref Either a reference matrix or "mean" to align on empirical mean
114#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
115#' (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
116#' "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
117#' reference (resp. in current row) compare to all unassigned columns in current row
118#' (resp. in reference)
119#'
120#' @return The aligned list (of matrices), of same size as Ms
121#'
122#' @export
123alignMatrices = function(Ms, ref, ls_mode)
124{
125 if (!is.matrix(ref) && ref != "mean")
126 stop("ref: matrix or 'mean'")
127 if (!ls_mode %in% c("exact","approx1","approx2"))
128 stop("ls_mode in {'exact','approx1','approx2'}")
129
130 K <- ncol(Ms[[1]])
131 if (is.character(ref)) #ref=="mean"
132 m_sum = Ms[[1]]
133 L <- length(Ms)
134 for (i in ifelse(is.character(ref),2,1):L)
135 {
136 m_ref = if (is.character(ref)) m_sum / (i-1) else ref
137 m = Ms[[i]] #shorthand
138
139 if (ls_mode == "exact")
140 {
141 #distances[i,j] = distance between m column i and ref column j
142 distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
143 assignment = .hungarianAlgorithm(distances)
144 col <- m[,assignment]
145 if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
146 }
147 else
148 {
149 # Greedy matching:
150 # approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
151 # approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
152 available_indices = 1:K
153 for (j in 1:K)
154 {
155 distances =
156 if (ls_mode == "approx1")
157 {
158 apply(as.matrix(m[,available_indices]), 2,
159 function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
160 }
161 else #approx2
162 {
163 apply(as.matrix(m_ref[,available_indices]), 2,
164 function(col) ( sqrt(sum((col - m[,j])^2)) ) )
165 }
166 indMin = which.min(distances)
167 if (ls_mode == "approx1")
168 {
169 col <- m[ , available_indices[indMin] ]
170 if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
171 }
172 else #approx2
173 {
174 col <- available_indices[indMin]
175 if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
176 }
177 available_indices = available_indices[-indMin]
178 }
179 }
180
181 # Update current sum with "label-switched" li[[i]]
182 if (is.character(ref)) #ref=="mean"
183 m_sum = m_sum + Ms[[i]]
184 }
185 Ms
186}