[morpheus.git] / pkg / R / utils.R

#' normalize
#'
#' Normalize a vector or a matrix (by columns), using euclidian norm
#'
#' @param X Vector or matrix to be normalized
#'
#' @return The normalized matrix (1 column if X is a vector)
#'
#' @export
normalize = function(X)
{
  X = as.matrix(X)
  norm2 = sqrt( colSums(X^2) )
  sweep(X, 2, norm2, '/')
}

# Computes a tensor-vector product
#
# @param Te third-order tensor (size dxdxd)
# @param w vector of size d
#
# @return Matrix of size dxd
#
.T_I_I_w = function(Te, w)
{
  d = length(w)
  Ma = matrix(0,nrow=d,ncol=d)
  for (j in 1:d)
    Ma = Ma + w[j] * Te[,,j]
  Ma
}

# Computes the second-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Matrix of size dxd
#
.Moments_M2 = function(X, Y)
{
  n = nrow(X)
  d = ncol(X)
  M2 = matrix(0,nrow=d,ncol=d)
  matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
    pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
}

# Computes the third-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Array of size dxdxd
#
.Moments_M3 = function(X, Y)
{
  n = nrow(X)
  d = ncol(X)
  M3 = array(0,dim=c(d,d,d))
  array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
    pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
}

#' computeMoments
#'
#' Compute cross-moments of order 1,2,3 from X,Y
#'
#' @inheritParams computeMu
#'
#' @return A list L where L[[i]] is the i-th cross-moment
#'
#' @export
computeMoments = function(X, Y)
  list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )

# Find the optimal assignment (permutation) between two sets (minimize cost)
#
# @param distances The distances matrix, in columns (distances[i,j] is distance between i
#   and j)
#
# @return A permutation minimizing cost
#
.hungarianAlgorithm = function(distances)
{
  n = nrow(distances)
  .C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
    assignment=integer(n), PACKAGE="morpheus")$assignment
}

#' alignMatrices
#'
#' Align a set of parameters matrices, with potential permutations.
#'
#' @param Ms A list of matrices, all of same size DxK
#' @param ref Either a reference matrix or "mean" to align on empirical mean
#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
#'   (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
#'   "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
#'   reference (resp. in current row) compare to all unassigned columns in current row
#'   (resp. in reference)
#'
#' @return The aligned list (of matrices), of same size as Ms
#'
#' @export
alignMatrices = function(Ms, ref, ls_mode)
{
  if (!is.matrix(ref) && ref != "mean")
    stop("ref: matrix or 'mean'")
  if (!ls_mode %in% c("exact","approx1","approx2"))
    stop("ls_mode in {'exact','approx1','approx2'}")

  K <- ncol(Ms[[1]])
  if (is.character(ref)) #ref=="mean"
    m_sum = Ms[[1]]
  L <- length(Ms)
  for (i in ifelse(is.character(ref),2,1):L)
  {
    m_ref = if (is.character(ref)) m_sum / (i-1) else ref
    m = Ms[[i]] #shorthand

    if (ls_mode == "exact")
    {
      #distances[i,j] = distance between m column i and ref column j
      distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
      assignment = .hungarianAlgorithm(distances)
      col <- m[,assignment]
      if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
    }
    else
    {
      # Greedy matching:
      #   approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
      #   approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
      available_indices = 1:K
      for (j in 1:K)
      {
        distances =
          if (ls_mode == "approx1")
          {
            apply(as.matrix(m[,available_indices]), 2,
              function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
          }
          else #approx2
          {
            apply(as.matrix(m_ref[,available_indices]), 2,
              function(col) ( sqrt(sum((col - m[,j])^2)) ) )
          }
        indMin = which.min(distances)
        if (ls_mode == "approx1")
        {
          col <- m[ , available_indices[indMin] ]
          if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
        }
        else #approx2
        {
          col <- available_indices[indMin]
          if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
        }
        available_indices = available_indices[-indMin]
      }
    }

    # Update current sum with "label-switched" li[[i]]
    if (is.character(ref)) #ref=="mean"
      m_sum = m_sum + Ms[[i]]
  }
  Ms
}
Commit	Line	Data
cbd88fe5 BA	1	#' normalize
	2	#'
	3	#' Normalize a vector or a matrix (by columns), using euclidian norm
	4	#'
	5	#' @param X Vector or matrix to be normalized
	6	#'
	7	#' @return The normalized matrix (1 column if X is a vector)
	8	#'
	9	#' @export
	10	normalize = function(X)
	11	{
6dd5c2ac BA	12	X = as.matrix(X)
	13	norm2 = sqrt( colSums(X^2) )
	14	sweep(X, 2, norm2, '/')
cbd88fe5 BA	15	}
	16
	17	# Computes a tensor-vector product
	18	#
	19	# @param Te third-order tensor (size dxdxd)
	20	# @param w vector of size d
	21	#
	22	# @return Matrix of size dxd
	23	#
	24	.T_I_I_w = function(Te, w)
	25	{
6dd5c2ac BA	26	d = length(w)
	27	Ma = matrix(0,nrow=d,ncol=d)
	28	for (j in 1:d)
	29	Ma = Ma + w[j] * Te[,,j]
	30	Ma
cbd88fe5 BA	31	}
	32
	33	# Computes the second-order empirical moment between input X and output Y
	34	#
	35	# @param X matrix of covariates (of size n*d)
	36	# @param Y vector of responses (of size n)
	37	#
	38	# @return Matrix of size dxd
	39	#
	40	.Moments_M2 = function(X, Y)
	41	{
6dd5c2ac BA	42	n = nrow(X)
	43	d = ncol(X)
	44	M2 = matrix(0,nrow=d,ncol=d)
	45	matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	46	pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
cbd88fe5 BA	47	}
	48
	49	# Computes the third-order empirical moment between input X and output Y
	50	#
	51	# @param X matrix of covariates (of size n*d)
	52	# @param Y vector of responses (of size n)
	53	#
	54	# @return Array of size dxdxd
	55	#
	56	.Moments_M3 = function(X, Y)
	57	{
6dd5c2ac BA	58	n = nrow(X)
	59	d = ncol(X)
	60	M3 = array(0,dim=c(d,d,d))
	61	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	62	pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
cbd88fe5 BA	63	}
	64
	65	#' computeMoments
	66	#'
	67	#' Compute cross-moments of order 1,2,3 from X,Y
	68	#'
	69	#' @inheritParams computeMu
	70	#'
	71	#' @return A list L where L[[i]] is the i-th cross-moment
	72	#'
	73	#' @export
	74	computeMoments = function(X, Y)
6dd5c2ac	75	list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
cbd88fe5 BA	76
	77	# Find the optimal assignment (permutation) between two sets (minimize cost)
	78	#
	79	# @param distances The distances matrix, in columns (distances[i,j] is distance between i
	80	# and j)
	81	#
	82	# @return A permutation minimizing cost
	83	#
	84	.hungarianAlgorithm = function(distances)
	85	{
6dd5c2ac BA	86	n = nrow(distances)
	87	.C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
	88	assignment=integer(n), PACKAGE="morpheus")$assignment
cbd88fe5 BA	89	}
	90
	91	#' alignMatrices
	92	#'
	93	#' Align a set of parameters matrices, with potential permutations.
	94	#'
	95	#' @param Ms A list of matrices, all of same size DxK
	96	#' @param ref Either a reference matrix or "mean" to align on empirical mean
	97	#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
	98	#' (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
	99	#' "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
	100	#' reference (resp. in current row) compare to all unassigned columns in current row
	101	#' (resp. in reference)
	102	#'
	103	#' @return The aligned list (of matrices), of same size as Ms
	104	#'
	105	#' @export
	106	alignMatrices = function(Ms, ref, ls_mode)
	107	{
6dd5c2ac BA	108	if (!is.matrix(ref) && ref != "mean")
	109	stop("ref: matrix or 'mean'")
	110	if (!ls_mode %in% c("exact","approx1","approx2"))
	111	stop("ls_mode in {'exact','approx1','approx2'}")
cbd88fe5	112
6dd5c2ac BA	113	K <- ncol(Ms[[1]])
	114	if (is.character(ref)) #ref=="mean"
	115	m_sum = Ms[[1]]
	116	L <- length(Ms)
	117	for (i in ifelse(is.character(ref),2,1):L)
	118	{
	119	m_ref = if (is.character(ref)) m_sum / (i-1) else ref
	120	m = Ms[[i]] #shorthand
cbd88fe5	121
6dd5c2ac BA	122	if (ls_mode == "exact")
	123	{
	124	#distances[i,j] = distance between m column i and ref column j
	125	distances = apply( m_ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
	126	assignment = .hungarianAlgorithm(distances)
	127	col <- m[,assignment]
	128	if (is.list(Ms)) Ms[[i]] <- col else Ms[,,i] <- col
	129	}
	130	else
	131	{
	132	# Greedy matching:
	133	# approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
	134	# approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
	135	available_indices = 1:K
	136	for (j in 1:K)
	137	{
	138	distances =
	139	if (ls_mode == "approx1")
	140	{
	141	apply(as.matrix(m[,available_indices]), 2,
	142	function(col) ( sqrt(sum((col - m_ref[,j])^2)) ) )
	143	}
	144	else #approx2
	145	{
	146	apply(as.matrix(m_ref[,available_indices]), 2,
	147	function(col) ( sqrt(sum((col - m[,j])^2)) ) )
	148	}
	149	indMin = which.min(distances)
	150	if (ls_mode == "approx1")
	151	{
	152	col <- m[ , available_indices[indMin] ]
	153	if (is.list(Ms)) Ms[[i]][,j] <- col else Ms[,j,i] <- col
	154	}
	155	else #approx2
	156	{
	157	col <- available_indices[indMin]
	158	if (is.list(Ms)) Ms[[i]][,col] <- m[,j] else Ms[,col,i] <- m[,j]
	159	}
	160	available_indices = available_indices[-indMin]
	161	}
	162	}
cbd88fe5	163
6dd5c2ac BA	164	# Update current sum with "label-switched" li[[i]]
	165	if (is.character(ref)) #ref=="mean"
	166	m_sum = m_sum + Ms[[i]]
	167	}
	168	Ms
cbd88fe5	169	}