[morpheus.git] / pkg / R / utils.R

#' normalize
#'
#' Normalize a vector or a matrix (by columns), using euclidian norm
#'
#' @param x Vector or matrix to be normalized
#'
#' @return The normalized matrix (1 column if x is a vector)
#'
#' @examples
#' x <- matrix(c(1,2,-1,3), ncol=2)
#' normalize(x) #column 1 is 1/sqrt(5) (1 2),
#'              #and column 2 is 1/sqrt(10) (-1, 3)
#' @export
normalize <- function(x)
{
  x <- as.matrix(x)
  norm2 <- sqrt( colSums(x^2) )
  sweep(x, 2, norm2, '/')
}

# Computes a tensor-vector product
#
# @param Te third-order tensor (size dxdxd)
# @param w vector of size d
#
# @return Matrix of size dxd
#
.T_I_I_w <- function(Te, w)
{
  d <- length(w)
  Ma <- matrix(0,nrow=d,ncol=d)
  for (j in 1:d)
    Ma <- Ma + w[j] * Te[,,j]
  Ma
}

# Computes the second-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Matrix of size dxd
#
.Moments_M2 <- function(X, Y)
{
  n <- nrow(X)
  d <- ncol(X)
  M2 <- matrix(0,nrow=d,ncol=d)
  matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
    pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
}

# Computes the third-order empirical moment between input X and output Y
#
# @param X matrix of covariates (of size n*d)
# @param Y vector of responses (of size n)
#
# @return Array of size dxdxd
#
.Moments_M3 <- function(X, Y)
{
  n <- nrow(X)
  d <- ncol(X)
  M3 <- array(0,dim=c(d,d,d))
  array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
    pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
}

#' computeMoments
#'
#' Compute cross-moments of order 1,2,3 from X,Y
#'
#' @inheritParams computeMu
#'
#' @return A list L where L[[i]] is the i-th cross-moment
#'
#' @examples
#' X <- matrix(rnorm(100), ncol=2)
#' Y <- rbinom(100, 1, .5)
#' M <- computeMoments(X, Y)
#'
#' @export
computeMoments = function(X, Y)
  list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )

# Find the optimal assignment (permutation) between two sets (minimize cost)
#
# @param distances The distances matrix, in columns
#   (distances[i,j] is distance between i and j)
#
# @return A permutation minimizing cost
#
.hungarianAlgorithm <- function(distances)
{
  n <- nrow(distances)
  .C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
    assignment=integer(n), PACKAGE="morpheus")$assignment
}

#' alignMatrices
#'
#' Align a set of parameters matrices, with potential permutations.
#'
#' @param Ms A list of matrices, all of same size DxK
#' @param ref A reference matrix to align other matrices with
#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
#'   (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
#'   "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
#'   reference (resp. in current row) compare to all unassigned columns in current row
#'   (resp. in reference)
#'
#' @return The aligned list (of matrices), of same size as Ms
#'
#' @examples
#' m1 <- matrix(c(1,1,0,0),ncol=2)
#' m2 <- matrix(c(0,0,1,1),ncol=2)
#' ref <- m1
#' Ms <- list(m1, m2, m1, m2)
#' a <- alignMatrices(Ms, ref, "exact")
#' # a[[i]] is expected to contain m1 for all i
#'
#' @export
alignMatrices <- function(Ms, ref, ls_mode=c("exact","approx1","approx2"))
{
  if (!is.matrix(ref) || any(is.na(ref)))
    stop("ref: matrix, no NAs")
  ls_mode <- match.arg(ls_mode)

  K <- ncol(Ms[[1]])
  L <- length(Ms)
  for (i in 1:L)
  {
    m <- Ms[[i]] #shorthand

    if (ls_mode == "exact")
    {
      #distances[i,j] = distance between m column i and ref column j
      distances = apply( ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
      assignment = .hungarianAlgorithm(distances)
      col <- m[,assignment]
      Ms[[i]] <- col
    }
    else
    {
      # Greedy matching:
      #   approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
      #   approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
      available_indices = 1:K
      for (j in 1:K)
      {
        distances =
          if (ls_mode == "approx1")
          {
            apply(as.matrix(m[,available_indices]), 2,
              function(col) ( sqrt(sum((col - ref[,j])^2)) ) )
          }
          else #approx2
          {
            apply(as.matrix(ref[,available_indices]), 2,
              function(col) ( sqrt(sum((col - m[,j])^2)) ) )
          }
        indMin = which.min(distances)
        if (ls_mode == "approx1")
        {
          col <- m[ , available_indices[indMin] ]
          Ms[[i]][,j] <- col
        }
        else #approx2
        {
          col <- available_indices[indMin]
          Ms[[i]][,col] <- m[,j]
        }
        available_indices = available_indices[-indMin]
      }
    }
  }
  Ms
}
Commit	Line	Data
	1	#' normalize
	2	#'
	3	#' Normalize a vector or a matrix (by columns), using euclidian norm
	4	#'
	5	#' @param x Vector or matrix to be normalized
	6	#'
	7	#' @return The normalized matrix (1 column if x is a vector)
	8	#'
	9	#' @examples
	10	#' x <- matrix(c(1,2,-1,3), ncol=2)
	11	#' normalize(x) #column 1 is 1/sqrt(5) (1 2),
	12	#' #and column 2 is 1/sqrt(10) (-1, 3)
	13	#' @export
	14	normalize <- function(x)
	15	{
	16	x <- as.matrix(x)
	17	norm2 <- sqrt( colSums(x^2) )
	18	sweep(x, 2, norm2, '/')
	19	}
	20
	21	# Computes a tensor-vector product
	22	#
	23	# @param Te third-order tensor (size dxdxd)
	24	# @param w vector of size d
	25	#
	26	# @return Matrix of size dxd
	27	#
	28	.T_I_I_w <- function(Te, w)
	29	{
	30	d <- length(w)
	31	Ma <- matrix(0,nrow=d,ncol=d)
	32	for (j in 1:d)
	33	Ma <- Ma + w[j] * Te[,,j]
	34	Ma
	35	}
	36
	37	# Computes the second-order empirical moment between input X and output Y
	38	#
	39	# @param X matrix of covariates (of size n*d)
	40	# @param Y vector of responses (of size n)
	41	#
	42	# @return Matrix of size dxd
	43	#
	44	.Moments_M2 <- function(X, Y)
	45	{
	46	n <- nrow(X)
	47	d <- ncol(X)
	48	M2 <- matrix(0,nrow=d,ncol=d)
	49	matrix( .C("Moments_M2", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	50	pd=as.integer(d), M2=as.double(M2), PACKAGE="morpheus")$M2, nrow=d, ncol=d)
	51	}
	52
	53	# Computes the third-order empirical moment between input X and output Y
	54	#
	55	# @param X matrix of covariates (of size n*d)
	56	# @param Y vector of responses (of size n)
	57	#
	58	# @return Array of size dxdxd
	59	#
	60	.Moments_M3 <- function(X, Y)
	61	{
	62	n <- nrow(X)
	63	d <- ncol(X)
	64	M3 <- array(0,dim=c(d,d,d))
	65	array( .C("Moments_M3", X=as.double(X), Y=as.double(Y), pn=as.integer(n),
	66	pd=as.integer(d), M3=as.double(M3), PACKAGE="morpheus")$M3, dim=c(d,d,d) )
	67	}
	68
	69	#' computeMoments
	70	#'
	71	#' Compute cross-moments of order 1,2,3 from X,Y
	72	#'
	73	#' @inheritParams computeMu
	74	#'
	75	#' @return A list L where L[[i]] is the i-th cross-moment
	76	#'
	77	#' @examples
	78	#' X <- matrix(rnorm(100), ncol=2)
	79	#' Y <- rbinom(100, 1, .5)
	80	#' M <- computeMoments(X, Y)
	81	#'
	82	#' @export
	83	computeMoments = function(X, Y)
	84	list( colMeans(Y * X), .Moments_M2(X,Y), .Moments_M3(X,Y) )
	85
	86	# Find the optimal assignment (permutation) between two sets (minimize cost)
	87	#
	88	# @param distances The distances matrix, in columns
	89	# (distances[i,j] is distance between i and j)
	90	#
	91	# @return A permutation minimizing cost
	92	#
	93	.hungarianAlgorithm <- function(distances)
	94	{
	95	n <- nrow(distances)
	96	.C("hungarianAlgorithm", distances=as.double(distances), pn=as.integer(n),
	97	assignment=integer(n), PACKAGE="morpheus")$assignment
	98	}
	99
	100	#' alignMatrices
	101	#'
	102	#' Align a set of parameters matrices, with potential permutations.
	103	#'
	104	#' @param Ms A list of matrices, all of same size DxK
	105	#' @param ref A reference matrix to align other matrices with
	106	#' @param ls_mode How to compute the labels assignment: "exact" for exact algorithm
	107	#' (default, but might be time-consuming, complexity is O(K^3) ), or "approx1", or
	108	#' "approx2" to apply a greedy matching algorithm (heuristic) which for each column in
	109	#' reference (resp. in current row) compare to all unassigned columns in current row
	110	#' (resp. in reference)
	111	#'
	112	#' @return The aligned list (of matrices), of same size as Ms
	113	#'
	114	#' @examples
	115	#' m1 <- matrix(c(1,1,0,0),ncol=2)
	116	#' m2 <- matrix(c(0,0,1,1),ncol=2)
	117	#' ref <- m1
	118	#' Ms <- list(m1, m2, m1, m2)
	119	#' a <- alignMatrices(Ms, ref, "exact")
	120	#' # a[[i]] is expected to contain m1 for all i
	121	#'
	122	#' @export
	123	alignMatrices <- function(Ms, ref, ls_mode=c("exact","approx1","approx2"))
	124	{
	125	if (!is.matrix(ref) \|\| any(is.na(ref)))
	126	stop("ref: matrix, no NAs")
	127	ls_mode <- match.arg(ls_mode)
	128
	129	K <- ncol(Ms[[1]])
	130	L <- length(Ms)
	131	for (i in 1:L)
	132	{
	133	m <- Ms[[i]] #shorthand
	134
	135	if (ls_mode == "exact")
	136	{
	137	#distances[i,j] = distance between m column i and ref column j
	138	distances = apply( ref, 2, function(col) ( sqrt(colSums((m-col)^2)) ) )
	139	assignment = .hungarianAlgorithm(distances)
	140	col <- m[,assignment]
	141	Ms[[i]] <- col
	142	}
	143	else
	144	{
	145	# Greedy matching:
	146	# approx1: li[[i]][,j] is assigned to m[,k] minimizing dist(li[[i]][,j],m[,k'])
	147	# approx2: m[,j] is assigned to li[[i]][,k] minimizing dist(m[,j],li[[i]][,k'])
	148	available_indices = 1:K
	149	for (j in 1:K)
	150	{
	151	distances =
	152	if (ls_mode == "approx1")
	153	{
	154	apply(as.matrix(m[,available_indices]), 2,
	155	function(col) ( sqrt(sum((col - ref[,j])^2)) ) )
	156	}
	157	else #approx2
	158	{
	159	apply(as.matrix(ref[,available_indices]), 2,
	160	function(col) ( sqrt(sum((col - m[,j])^2)) ) )
	161	}
	162	indMin = which.min(distances)
	163	if (ls_mode == "approx1")
	164	{
	165	col <- m[ , available_indices[indMin] ]
	166	Ms[[i]][,j] <- col
	167	}
	168	else #approx2
	169	{
	170	col <- available_indices[indMin]
	171	Ms[[i]][,col] <- m[,j]
	172	}
	173	available_indices = available_indices[-indMin]
	174	}
	175	}
	176	}
	177	Ms
	178	}