[morpheus.git] / pkg / R / utils.R

#' normalize
#'
#' Normalize a vector or a matrix (by columns), using euclidian norm
#'
#' @param X Vector or matrix to be normalized
#'
#' @return The normalized matrix (1 column if X is a vector)
#'
#' @export
normalize = function(X)
{
	X = as.matrix(X)
	norm2 = sqrt( colSums(X^2) )
	sweep(X, 2, norm2, '/')
}

# Computes a tensor-vector product
#
# @param Te third-order tensor (size dxdxd)
# @param w vector of size d
#
# @return Matrix of size dxd
#
.T_I_I_w = function(Te, w)
{
	d = length(w)
	Ma = matrix(0,nrow=d,ncol=d)
	for (j in 1:d)
		Ma = Ma + w[j] * Te[,,j]
	Ma
}

# Computes the second-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Matrix of size dxd
#
.Moments_M2 = function(X, Y)
{
	n = nrow(X)
	d = ncol(X)
	M2 = matrix(0,nrow=d,ncol=d)
	matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
		pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
}

# Computes the third-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Array of size dxdxd
#
.Moments_M3 = function(X, Y)
{
	n = nrow(X)
	d = ncol(X)
	M3 = array(0,dim=c(d,d,d))
	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
		pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
}

#' computeMoments
#'
#' Compute cross-moments of order 1,2,3 from X,Y
#'
#' @inheritParams computeMu
#'
#' @return A list L where L[[i]] is the i-th cross-moment
#'
#' @export
computeMoments = function(X, Y)
	list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )

# Find the optimal assignment (permutation) between two sets (minimize cost)
#
# @param distances The distances matrix, in columns (distances[i,j] is distance between i
#   and j)
#
# @return A permutation minimizing cost
#
.hungarianAlgorithm = function(distances)
{
	n = nrow(distances)
	.C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
		assignment=integer(n), PACKAGE="morpheus")$assignment
}

#' alignMatrices
#'
#' Align a set of parameters matrices, with potential permutations.
#'
#' @param Ms A list of matrices, all of same size DxK
#' @param ref Either a reference matrix or "mean" to align on empirical mean
#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
#'   (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
#'   "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
#'   reference (resp. in current row) compare to all unassigned columns in current row
#'   (resp. in reference)
#'
#' @return The aligned list (of matrices), of same size as Ms
#'
#' @export
alignMatrices = function(Ms, ref, ls_mode)
{
	if (!is.matrix(ref) && ref != "mean")
		stop("ref: matrix or 'mean'")
	if (!ls_mode %in% c("exact","approx1","approx2"))
		stop("ls_mode in {'exact','approx1','approx2'}")

	K <- ncol(Ms[[1]])
	if (is.character(ref)) #ref=="mean"
		m_sum = Ms[[1]]
	L <- length(Ms)
	for (i in ifelse(is.character(ref),2,1):L)
	{
		m_ref = if (is.character(ref)) m_sum / (i-1) else ref
		m = Ms[[i]] #shorthand

		if (ls_mode == "exact")
		{
			#distances[i,j] = distance between m column i and ref column j
			distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
			assignment = .hungarianAlgorithm(distances)
			col <- m[,assignment]
			if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
		}
		else
		{
			# Greedy matching:
			#   approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
			#   approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
			available_indices = 1:K
			for (j in 1:K)
			{
				distances =
					if (ls_mode == "approx1")
					{
						apply(as.matrix(m[,available_indices]), 2,
							function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
					}
					else #approx2
					{
						apply(as.matrix(m_ref[,available_indices]), 2,
							function(col) ( sqrt(sum((col - m[,j])^2)) ) )
					}
				indMin = which.min(distances)
				if (ls_mode == "approx1")
				{
					col <- m[ , available_indices[indMin] ]
					if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
				}
				else #approx2
				{
					col <- available_indices[indMin]
					if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
				}
				available_indices = available_indices[-indMin]
			}
		}

		# Update current sum with "label-switched" li[[i]]
		if (is.character(ref)) #ref=="mean"
			m_sum = m_sum + Ms[[i]]
	}
	Ms
}
Commit	Line	Data
	1	#' normalize
	2	#'
	3	#' Normalize a vector or a matrix (by columns), using euclidian norm
	4	#'
	5	#' @param X Vector or matrix to be normalized
	6	#'
	7	#' @return The normalized matrix (1 column if X is a vector)
	8	#'
	9	#' @export
	10	normalize = function(X)
	11	{
	12	X = as.matrix(X)
	13	norm2 = sqrt( colSums(X^2) )
	14	sweep(X, 2, norm2, '/')
	15	}
	16
	17	# Computes a tensor-vector product
	18	#
	19	# @param Te third-order tensor (size dxdxd)
	20	# @param w vector of size d
	21	#
	22	# @return Matrix of size dxd
	23	#
	24	.T_I_I_w = function(Te, w)
	25	{
	26	d = length(w)
	27	Ma = matrix(0,nrow=d,ncol=d)
	28	for (j in 1:d)
	29	Ma = Ma + w[j] * Te[,,j]
	30	Ma
	31	}
	32
	33	# Computes the second-order empirical moment between input X and output Y
	34	#
	35	# @param X matrix of covariates (of size n*d)
	36	# @param Y vector of responses (of size n)
	37	#
	38	# @return Matrix of size dxd
	39	#
	40	.Moments_M2 = function(X, Y)
	41	{
	42	n = nrow(X)
	43	d = ncol(X)
	44	M2 = matrix(0,nrow=d,ncol=d)
	45	matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	46	pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
	47	}
	48
	49	# Computes the third-order empirical moment between input X and output Y
	50	#
	51	# @param X matrix of covariates (of size n*d)
	52	# @param Y vector of responses (of size n)
	53	#
	54	# @return Array of size dxdxd
	55	#
	56	.Moments_M3 = function(X, Y)
	57	{
	58	n = nrow(X)
	59	d = ncol(X)
	60	M3 = array(0,dim=c(d,d,d))
	61	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	62	pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
	63	}
	64
	65	#' computeMoments
	66	#'
	67	#' Compute cross-moments of order 1,2,3 from X,Y
	68	#'
	69	#' @inheritParams computeMu
	70	#'
	71	#' @return A list L where L[[i]] is the i-th cross-moment
	72	#'
	73	#' @export
	74	computeMoments = function(X, Y)
	75	list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
	76
	77	# Find the optimal assignment (permutation) between two sets (minimize cost)
	78	#
	79	# @param distances The distances matrix, in columns (distances[i,j] is distance between i
	80	# and j)
	81	#
	82	# @return A permutation minimizing cost
	83	#
	84	.hungarianAlgorithm = function(distances)
	85	{
	86	n = nrow(distances)
	87	.C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
	88	assignment=integer(n), PACKAGE="morpheus")$assignment
	89	}
	90
	91	#' alignMatrices
	92	#'
	93	#' Align a set of parameters matrices, with potential permutations.
	94	#'
	95	#' @param Ms A list of matrices, all of same size DxK
	96	#' @param ref Either a reference matrix or "mean" to align on empirical mean
	97	#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
	98	#' (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
	99	#' "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
	100	#' reference (resp. in current row) compare to all unassigned columns in current row
	101	#' (resp. in reference)
	102	#'
	103	#' @return The aligned list (of matrices), of same size as Ms
	104	#'
	105	#' @export
	106	alignMatrices = function(Ms, ref, ls_mode)
	107	{
	108	if (!is.matrix(ref) && ref != "mean")
	109	stop("ref: matrix or 'mean'")
	110	if (!ls_mode %in% c("exact","approx1","approx2"))
	111	stop("ls_mode in {'exact','approx1','approx2'}")
	112
	113	K <- ncol(Ms[[1]])
	114	if (is.character(ref)) #ref=="mean"
	115	m_sum = Ms[[1]]
	116	L <- length(Ms)
	117	for (i in ifelse(is.character(ref),2,1):L)
	118	{
	119	m_ref = if (is.character(ref)) m_sum / (i-1) else ref
	120	m = Ms[[i]] #shorthand
	121
	122	if (ls_mode == "exact")
	123	{
	124	#distances[i,j] = distance between m column i and ref column j
	125	distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
	126	assignment = .hungarianAlgorithm(distances)
	127	col <- m[,assignment]
	128	if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
	129	}
	130	else
	131	{
	132	# Greedy matching:
	133	# approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
	134	# approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
	135	available_indices = 1:K
	136	for (j in 1:K)
	137	{
	138	distances =
	139	if (ls_mode == "approx1")
	140	{
	141	apply(as.matrix(m[,available_indices]), 2,
	142	function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
	143	}
	144	else #approx2
	145	{
	146	apply(as.matrix(m_ref[,available_indices]), 2,
	147	function(col) ( sqrt(sum((col - m[,j])^2)) ) )
	148	}
	149	indMin = which.min(distances)
	150	if (ls_mode == "approx1")
	151	{
	152	col <- m[ , available_indices[indMin] ]
	153	if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
	154	}
	155	else #approx2
	156	{
	157	col <- available_indices[indMin]
	158	if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
	159	}
	160	available_indices = available_indices[-indMin]
	161	}
	162	}
	163
	164	# Update current sum with "label-switched" li[[i]]
	165	if (is.character(ref)) #ref=="mean"
	166	m_sum = m_sum + Ms[[i]]
	167	}
	168	Ms
	169	}