[morpheus.git] / pkg / R / utils.R

#' normalize
#'
#' Normalize a vector or a matrix (by columns), using euclidian norm
#'
#' @param X Vector or matrix to be normalized
#'
#' @return The normalized matrix (1 column if X is a vector)
#'
#' @export
normalize = function(X)
{
	X = as.matrix(X)
	norm2 = sqrt( colSums(X^2) )
	sweep(X, 2, norm2, '/')
}

# Computes a tensor-vector product
#
# @param Te third-order tensor (size dxdxd)
# @param w vector of size d
#
# @return Matrix of size dxd
#
.T_I_I_w = function(Te, w)
{
	d = length(w)
	Ma = matrix(0,nrow=d,ncol=d)
	for (j in 1:d)
		Ma = Ma + w[j] * Te[,,j]
	Ma
}

# Computes the second-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Matrix of size dxd
#
.Moments_M2 = function(X, Y)
{
	n = nrow(X)
	d = ncol(X)
	M2 = matrix(0,nrow=d,ncol=d)
	matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
		pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
}

# Computes the third-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Array of size dxdxd
#
.Moments_M3 = function(X, Y)
{
	n = nrow(X)
	d = ncol(X)
	M3 = array(0,dim=c(d,d,d))
	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
		pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
}

#' computeMoments
#'
#' Compute cross-moments of order 1,2,3 from X,Y
#'
#' @inheritParams computeMu
#'
#' @return A list L where L[[i]] is the i-th cross-moment
#'
#' @export
computeMoments = function(X, Y)
	list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )

# Computes the Omega matrix for generalized least square method
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
# @param theta list with p, beta, b
#
# @return Matrix of size dimxdim where dim=d+d^2+d^3
#
.Moments_M3 = function(X, Y)
{
	n = nrow(X)
	d = ncol(X)
	M3 = array(0,dim=c(d,d,d))
	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
		pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
}

# Find the optimal assignment (permutation) between two sets (minimize cost)
#
# @param distances The distances matrix, in columns (distances[i,j] is distance between i
#   and j)
#
# @return A permutation minimizing cost
#
.hungarianAlgorithm = function(distances)
{
	n = nrow(distances)
	.C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
		assignment=integer(n), PACKAGE="morpheus")$assignment
}

#' alignMatrices
#'
#' Align a set of parameters matrices, with potential permutations.
#'
#' @param Ms A list of matrices, all of same size DxK
#' @param ref Either a reference matrix or "mean" to align on empirical mean
#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
#'   (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
#'   "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
#'   reference (resp. in current row) compare to all unassigned columns in current row
#'   (resp. in reference)
#'
#' @return The aligned list (of matrices), of same size as Ms
#'
#' @export
alignMatrices = function(Ms, ref, ls_mode)
{
	if (!is.matrix(ref) && ref != "mean")
		stop("ref: matrix or 'mean'")
	if (!ls_mode %in% c("exact","approx1","approx2"))
		stop("ls_mode in {'exact','approx1','approx2'}")

	K <- ncol(Ms[[1]])
	if (is.character(ref)) #ref=="mean"
		m_sum = Ms[[1]]
	L <- length(Ms)
	for (i in ifelse(is.character(ref),2,1):L)
	{
		m_ref = if (is.character(ref)) m_sum / (i-1) else ref
		m = Ms[[i]] #shorthand

		if (ls_mode == "exact")
		{
			#distances[i,j] = distance between m column i and ref column j
			distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
			assignment = .hungarianAlgorithm(distances)
			col <- m[,assignment]
			if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
		}
		else
		{
			# Greedy matching:
			#   approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
			#   approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
			available_indices = 1:K
			for (j in 1:K)
			{
				distances =
					if (ls_mode == "approx1")
					{
						apply(as.matrix(m[,available_indices]), 2,
							function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
					}
					else #approx2
					{
						apply(as.matrix(m_ref[,available_indices]), 2,
							function(col) ( sqrt(sum((col - m[,j])^2)) ) )
					}
				indMin = which.min(distances)
				if (ls_mode == "approx1")
				{
					col <- m[ , available_indices[indMin] ]
					if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
				}
				else #approx2
				{
					col <- available_indices[indMin]
					if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
				}
				available_indices = available_indices[-indMin]
			}
		}

		# Update current sum with "label-switched" li[[i]]
		if (is.character(ref)) #ref=="mean"
			m_sum = m_sum + Ms[[i]]
	}
	Ms
}
Commit	Line	Data
cbd88fe5 BA	1	#' normalize
	2	#'
	3	#' Normalize a vector or a matrix (by columns), using euclidian norm
	4	#'
	5	#' @param X Vector or matrix to be normalized
	6	#'
	7	#' @return The normalized matrix (1 column if X is a vector)
	8	#'
	9	#' @export
	10	normalize = function(X)
	11	{
	12	X = as.matrix(X)
	13	norm2 = sqrt( colSums(X^2) )
	14	sweep(X, 2, norm2, '/')
	15	}
	16
	17	# Computes a tensor-vector product
	18	#
	19	# @param Te third-order tensor (size dxdxd)
	20	# @param w vector of size d
	21	#
	22	# @return Matrix of size dxd
	23	#
	24	.T_I_I_w = function(Te, w)
	25	{
	26	d = length(w)
	27	Ma = matrix(0,nrow=d,ncol=d)
	28	for (j in 1:d)
	29	Ma = Ma + w[j] * Te[,,j]
	30	Ma
	31	}
	32
	33	# Computes the second-order empirical moment between input X and output Y
	34	#
	35	# @param X matrix of covariates (of size n*d)
	36	# @param Y vector of responses (of size n)
	37	#
	38	# @return Matrix of size dxd
	39	#
	40	.Moments_M2 = function(X, Y)
	41	{
	42	n = nrow(X)
	43	d = ncol(X)
	44	M2 = matrix(0,nrow=d,ncol=d)
	45	matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	46	pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
	47	}
	48
	49	# Computes the third-order empirical moment between input X and output Y
	50	#
	51	# @param X matrix of covariates (of size n*d)
	52	# @param Y vector of responses (of size n)
	53	#
	54	# @return Array of size dxdxd
	55	#
	56	.Moments_M3 = function(X, Y)
	57	{
	58	n = nrow(X)
	59	d = ncol(X)
	60	M3 = array(0,dim=c(d,d,d))
	61	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	62	pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
	63	}
	64
65	#' computeMoments
66	#'
67	#' Compute cross-moments of order 1,2,3 from X,Y
68	#'
69	#' @inheritParams computeMu
70	#'
71	#' @return A list L where L[[i]] is the i-th cross-moment
72	#'
73	#' @export
74	computeMoments = function(X, Y)
75	list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
76
4263503b BA	77	# Computes the Omega matrix for generalized least square method
	78	#
	79	# @param X matrix of covariates (of size n*d)
	80	# @param Y vector of responses (of size n)
	81	# @param theta list with p, beta, b
	82	#
	83	# @return Matrix of size dimxdim where dim=d+d^2+d^3
	84	#
	85	.Moments_M3 = function(X, Y)
	86	{
	87	n = nrow(X)
	88	d = ncol(X)
	89	M3 = array(0,dim=c(d,d,d))
	90	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	91	pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
	92	}
	93
cbd88fe5 BA	94	# Find the optimal assignment (permutation) between two sets (minimize cost)
	95	#
	96	# @param distances The distances matrix, in columns (distances[i,j] is distance between i
	97	# and j)
	98	#
	99	# @return A permutation minimizing cost
	100	#
	101	.hungarianAlgorithm = function(distances)
	102	{
	103	n = nrow(distances)
	104	.C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
	105	assignment=integer(n), PACKAGE="morpheus")$assignment
	106	}
	107
	108	#' alignMatrices
	109	#'
	110	#' Align a set of parameters matrices, with potential permutations.
	111	#'
	112	#' @param Ms A list of matrices, all of same size DxK
	113	#' @param ref Either a reference matrix or "mean" to align on empirical mean
	114	#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
	115	#' (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
	116	#' "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
	117	#' reference (resp. in current row) compare to all unassigned columns in current row
	118	#' (resp. in reference)
	119	#'
	120	#' @return The aligned list (of matrices), of same size as Ms
	121	#'
	122	#' @export
	123	alignMatrices = function(Ms, ref, ls_mode)
	124	{
	125	if (!is.matrix(ref) && ref != "mean")
	126	stop("ref: matrix or 'mean'")
	127	if (!ls_mode %in% c("exact","approx1","approx2"))
	128	stop("ls_mode in {'exact','approx1','approx2'}")
	129
	130	K <- ncol(Ms[[1]])
	131	if (is.character(ref)) #ref=="mean"
	132	m_sum = Ms[[1]]
	133	L <- length(Ms)
	134	for (i in ifelse(is.character(ref),2,1):L)
	135	{
	136	m_ref = if (is.character(ref)) m_sum / (i-1) else ref
	137	m = Ms[[i]] #shorthand
	138
	139	if (ls_mode == "exact")
	140	{
	141	#distances[i,j] = distance between m column i and ref column j
	142	distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
	143	assignment = .hungarianAlgorithm(distances)
	144	col <- m[,assignment]
	145	if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
	146	}
	147	else
	148	{
	149	# Greedy matching:
	150	# approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
	151	# approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
	152	available_indices = 1:K
	153	for (j in 1:K)
	154	{
	155	distances =
	156	if (ls_mode == "approx1")
	157	{
158	apply(as.matrix(m[,available_indices]), 2,
159	function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
160	}
161	else #approx2
162	{
163	apply(as.matrix(m_ref[,available_indices]), 2,
164	function(col) ( sqrt(sum((col - m[,j])^2)) ) )
165	}
166	indMin = which.min(distances)
167	if (ls_mode == "approx1")
168	{
169	col <- m[ , available_indices[indMin] ]
170	if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
171	}
172	else #approx2
173	{
174	col <- available_indices[indMin]
175	if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
176	}
177	available_indices = available_indices[-indMin]
178	}
179	}
180
181	# Update current sum with "label-switched" li[[i]]
182	if (is.character(ref)) #ref=="mean"
183	m_sum = m_sum + Ms[[i]]
184	}
185	Ms
186	}