[epclust.git] / epclust / R / utils.R

# Check integer arguments with functional conditions
.toInteger <- function(x, condition)
{
	errWarn <- function(ignored)
		paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
	if (!is.integer(x))
		tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
			warning=errWarn, error=errWarn)
	if (!condition(x))
	{
		stop(paste("Argument '",substitute(x),
			"' does not verify condition ",body(condition), sep=""))
	}
	x
}

# Check logical arguments
.toLogical <- function(x)
{
	errWarn <- function(ignored)
		paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
	if (!is.logical(x))
		tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
			warning=errWarn, error=errWarn)
	x
}

#' curvesToContribs
#'
#' Compute the discrete wavelet coefficients for each series, and aggregate them in
#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
#'
#' @param curves [big.]matrix of series (in columns), of size L x n
#' @inheritParams claws
#'
#' @return A matrix of size log(L) x n containing contributions in columns
#'
#' @export
curvesToContribs <- function(curves, wav_filt, contrib_type)
{
	series <- as.matrix(curves)
	L <- nrow(series)
	D <- ceiling( log2(L) )
	# Series are interpolated to all have length 2^D
	nb_sample_points <- 2^D
	apply(series, 2, function(x) {
		interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
		W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
		# Compute the sum of squared discrete wavelet coefficients, for each scale
		nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
		if (contrib_type!="absolute")
			nrj <- nrj / sum(nrj)
		if (contrib_type=="logit")
			nrj <- - log(1 - nrj)
		unname( nrj )
	})
}

# Helper function to divide indices into balanced sets.
# Ensure that all indices sets have at least min_size elements.
.splitIndices <- function(indices, nb_per_set, min_size=1)
{
	L <- length(indices)
	nb_workers <- floor( L / nb_per_set )
	rem <- L %% nb_per_set
	if (nb_workers == 0 || (nb_workers==1 && rem==0))
	{
		# L <= nb_per_set, simple case
		return (list(indices))
	}

	indices_workers <- lapply( seq_len(nb_workers), function(i)
		indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )

	rem <- L %% nb_per_set #number of remaining unassigned items
	if (rem == 0)
		return (indices_workers)

	rem <- (L-rem+1):L
	# If remainder is smaller than min_size, feed it with indices from other sets
	# until either its size exceed min_size (success) or other sets' size
	# get lower min_size (failure).
	while (length(rem) < min_size)
	{
		index <- length(rem) %% nb_workers + 1
		if (length(indices_workers[[index]]) <= min_size)
		{
			stop("Impossible to split indices properly for clustering.
				Try increasing nb_items_clust or decreasing K1")
		}
		rem <- c(rem, tail(indices_workers[[index]],1))
		indices_workers[[index]] <- head( indices_workers[[index]], -1)
	}
	return ( c(indices_workers, list(rem) ) )
}

#' assignMedoids
#'
#' Find the closest medoid for each curve in input
#'
#' @param curves (Chunk) of series whose medoids indices must be found
#' @param medoids Matrix of medoids (in columns)
#'
#' @return The vector of integer assignments
#' @export
assignMedoids <- function(curves, medoids)
{
	nb_series <- ncol(curves)
	mi <- rep(NA,nb_series)
	for (i in seq_len(nb_series))
		mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
	mi
}

#' filterMA
#'
#' Filter [time-]series by replacing all values by the moving average of values
#' centered around current one. Border values are averaged with available data.
#'
#' @param M_ A real matrix of size LxD
#' @param w_ The (odd) number of values to average
#'
#' @return The filtered matrix (in columns), of same size as the input
#' @export
filterMA <- function(M_, w_)
	.Call("filterMA", M_, w_, PACKAGE="epclust")

#' cleanBin
#'
#' Remove binary files to re-generate them at next run of \code{claws()}.
#' To be run in the folder where computations occurred (or no effect).
#'
#' @export
cleanBin <- function()
{
	bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
	for (file in bin_files)
		unlink(file)
}
Commit	Line	Data
	1	# Check integer arguments with functional conditions
	2	.toInteger <- function(x, condition)
	3	{
	4	errWarn <- function(ignored)
	5	paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
	6	if (!is.integer(x))
	7	tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
	8	warning=errWarn, error=errWarn)
	9	if (!condition(x))
	10	{
	11	stop(paste("Argument '",substitute(x),
	12	"' does not verify condition ",body(condition), sep=""))
	13	}
	14	x
	15	}
	16
	17	# Check logical arguments
	18	.toLogical <- function(x)
	19	{
	20	errWarn <- function(ignored)
	21	paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
	22	if (!is.logical(x))
	23	tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
	24	warning=errWarn, error=errWarn)
	25	x
	26	}
	27
	28	#' curvesToContribs
	29	#'
	30	#' Compute the discrete wavelet coefficients for each series, and aggregate them in
	31	#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
	32	#'
	33	#' @param curves [big.]matrix of series (in columns), of size L x n
	34	#' @inheritParams claws
	35	#'
	36	#' @return A matrix of size log(L) x n containing contributions in columns
	37	#'
	38	#' @export
	39	curvesToContribs <- function(curves, wav_filt, contrib_type)
	40	{
	41	series <- as.matrix(curves)
	42	L <- nrow(series)
	43	D <- ceiling( log2(L) )
	44	# Series are interpolated to all have length 2^D
	45	nb_sample_points <- 2^D
	46	apply(series, 2, function(x) {
	47	interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
	48	W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
	49	# Compute the sum of squared discrete wavelet coefficients, for each scale
	50	nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
	51	if (contrib_type!="absolute")
	52	nrj <- nrj / sum(nrj)
	53	if (contrib_type=="logit")
	54	nrj <- - log(1 - nrj)
	55	unname( nrj )
	56	})
	57	}
	58
	59	# Helper function to divide indices into balanced sets.
	60	# Ensure that all indices sets have at least min_size elements.
	61	.splitIndices <- function(indices, nb_per_set, min_size=1)
	62	{
	63	L <- length(indices)
	64	nb_workers <- floor( L / nb_per_set )
	65	rem <- L %% nb_per_set
	66	if (nb_workers == 0 \|\| (nb_workers==1 && rem==0))
	67	{
	68	# L <= nb_per_set, simple case
	69	return (list(indices))
	70	}
	71
	72	indices_workers <- lapply( seq_len(nb_workers), function(i)
	73	indices[(nb_per_set(i-1)+1):(nb_per_seti)] )
	74
	75	rem <- L %% nb_per_set #number of remaining unassigned items
	76	if (rem == 0)
	77	return (indices_workers)
	78
	79	rem <- (L-rem+1):L
	80	# If remainder is smaller than min_size, feed it with indices from other sets
	81	# until either its size exceed min_size (success) or other sets' size
	82	# get lower min_size (failure).
	83	while (length(rem) < min_size)
	84	{
	85	index <- length(rem) %% nb_workers + 1
	86	if (length(indices_workers[[index]]) <= min_size)
	87	{
	88	stop("Impossible to split indices properly for clustering.
	89	Try increasing nb_items_clust or decreasing K1")
	90	}
	91	rem <- c(rem, tail(indices_workers[[index]],1))
	92	indices_workers[[index]] <- head( indices_workers[[index]], -1)
	93	}
	94	return ( c(indices_workers, list(rem) ) )
	95	}
	96
	97	#' assignMedoids
	98	#'
	99	#' Find the closest medoid for each curve in input
	100	#'
	101	#' @param curves (Chunk) of series whose medoids indices must be found
	102	#' @param medoids Matrix of medoids (in columns)
	103	#'
	104	#' @return The vector of integer assignments
	105	#' @export
	106	assignMedoids <- function(curves, medoids)
	107	{
	108	nb_series <- ncol(curves)
	109	mi <- rep(NA,nb_series)
	110	for (i in seq_len(nb_series))
	111	mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
	112	mi
	113	}
	114
	115	#' filterMA
	116	#'
	117	#' Filter [time-]series by replacing all values by the moving average of values
	118	#' centered around current one. Border values are averaged with available data.
	119	#'
	120	#' @param M_ A real matrix of size LxD
	121	#' @param w_ The (odd) number of values to average
	122	#'
	123	#' @return The filtered matrix (in columns), of same size as the input
	124	#' @export
	125	filterMA <- function(M_, w_)
	126	.Call("filterMA", M_, w_, PACKAGE="epclust")
	127
	128	#' cleanBin
	129	#'
	130	#' Remove binary files to re-generate them at next run of \code{claws()}.
	131	#' To be run in the folder where computations occurred (or no effect).
	132	#'
	133	#' @export
	134	cleanBin <- function()
	135	{
	136	bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
	137	for (file in bin_files)
	138	unlink(file)
	139	}