[epclust.git] / epclust / R / utils.R

# Check integer arguments with functional conditions
.toInteger <- function(x, condition)
{
	errWarn <- function(ignored)
		paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
	if (!is.integer(x))
		tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
			warning = errWarn, error = errWarn)
	if (!condition(x))
	{
		stop(paste("Argument '",substitute(x),
			"' does not verify condition ",body(condition), sep=""))
	}
	x
}

# Check logical arguments
.toLogical <- function(x)
{
	errWarn <- function(ignored)
		paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
	if (!is.logical(x))
		tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
			warning = errWarn, error = errWarn)
	x
}

#' curvesToContribs
#'
#' Compute the discrete wavelet coefficients for each series, and aggregate them in
#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
#'
#' @param series [big.]matrix of series (in columns), of size L x n
#' @inheritParams claws
#'
#' @return A matrix of size log(L) x n containing contributions in columns
#'
#' @export
curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
{
	L = nrow(series)
	D = ceiling( log2(L) )
	# Series are interpolated to all have length 2^D
	nb_sample_points = 2^D
	apply(series, 2, function(x) {
		interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
		W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
		# Compute the sum of squared discrete wavelet coefficients, for each scale
		nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
		if (contrib_type!="absolute")
			nrj = nrj / sum(nrj)
		if (contrib_type=="logit")
			nrj = - log(1 - nrj)
		nrj
	})
}

# Helper function to divide indices into balanced sets
# If max == TRUE, sets sizes cannot exceed nb_per_set
.splitIndices = function(indices, nb_per_set, max=FALSE)
{
	L = length(indices)
	nb_workers = floor( L / nb_per_set )
	rem = L %% nb_per_set
	if (nb_workers == 0 || (nb_workers==1 && rem==0))
	{
		# L <= nb_per_set, simple case
		indices_workers = list(indices)
	}
	else
	{
		indices_workers = lapply( seq_len(nb_workers), function(i)
			indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )

		if (max)
		{
			# Sets are not so well balanced, but size is supposed to be critical
			return ( c( indices_workers, if (rem>0) list((L-rem+1):L) else NULL ) )
		}

		# Spread the remaining load among the workers
		rem = L %% nb_per_set
		while (rem > 0)
		{
			index = rem%%nb_workers + 1
			indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
			rem = rem - 1
		}
	}
	indices_workers
}

#' filterMA
#'
#' Filter [time-]series by replacing all values by the moving average of values
#' centered around current one. Border values are averaged with available data.
#'
#' @param M_ A real matrix of size LxD
#' @param w_ The (odd) number of values to average
#'
#' @return The filtered matrix, of same size as the input
#' @export
filterMA = function(M_, w_)
	.Call("filterMA", M_, w_, PACKAGE="epclust")

#' cleanBin
#'
#' Remove binary files to re-generate them at next run of \code{claws()}.
#' Note: run it in the folder where the computations occurred (or no effect).
#'
#' @export
cleanBin <- function()
{
	bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
	for (file in bin_files)
		unlink(file)
}
Commit	Line	Data
40f12a2f BA	1	# Check integer arguments with functional conditions
	2	.toInteger <- function(x, condition)
	3	{
	4	errWarn <- function(ignored)
	5	paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
	6	if (!is.integer(x))
	7	tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
	8	warning = errWarn, error = errWarn)
	9	if (!condition(x))
	10	{
	11	stop(paste("Argument '",substitute(x),
	12	"' does not verify condition ",body(condition), sep=""))
	13	}
	14	x
	15	}
	16
	17	# Check logical arguments
	18	.toLogical <- function(x)
	19	{
	20	errWarn <- function(ignored)
	21	paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
	22	if (!is.logical(x))
	23	tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
	24	warning = errWarn, error = errWarn)
	25	x
	26	}
	27
	28	#' curvesToContribs
	29	#'
	30	#' Compute the discrete wavelet coefficients for each series, and aggregate them in
	31	#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
	32	#'
	33	#' @param series [big.]matrix of series (in columns), of size L x n
	34	#' @inheritParams claws
	35	#'
	36	#' @return A matrix of size log(L) x n containing contributions in columns
	37	#'
	38	#' @export
	39	curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
	40	{
	41	L = nrow(series)
	42	D = ceiling( log2(L) )
	43	# Series are interpolated to all have length 2^D
	44	nb_sample_points = 2^D
	45	apply(series, 2, function(x) {
	46	interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
	47	W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
	48	# Compute the sum of squared discrete wavelet coefficients, for each scale
	49	nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
	50	if (contrib_type!="absolute")
	51	nrj = nrj / sum(nrj)
	52	if (contrib_type=="logit")
	53	nrj = - log(1 - nrj)
	54	nrj
	55	})
	56	}
	57
	58	# Helper function to divide indices into balanced sets
	59	# If max == TRUE, sets sizes cannot exceed nb_per_set
	60	.splitIndices = function(indices, nb_per_set, max=FALSE)
	61	{
	62	L = length(indices)
	63	nb_workers = floor( L / nb_per_set )
	64	rem = L %% nb_per_set
65	if (nb_workers == 0 \|\| (nb_workers==1 && rem==0))
66	{
67	# L <= nb_per_set, simple case
68	indices_workers = list(indices)
69	}
70	else
71	{
72	indices_workers = lapply( seq_len(nb_workers), function(i)
73	indices[(nb_per_set(i-1)+1):(nb_per_seti)] )
74
75	if (max)
76	{
77	# Sets are not so well balanced, but size is supposed to be critical
78	return ( c( indices_workers, if (rem>0) list((L-rem+1):L) else NULL ) )
79	}
80
81	# Spread the remaining load among the workers
82	rem = L %% nb_per_set
83	while (rem > 0)
84	{
85	index = rem%%nb_workers + 1
86	indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
87	rem = rem - 1
88	}
89	}
90	indices_workers
91	}
92
93	#' filterMA
94	#'
95	#' Filter [time-]series by replacing all values by the moving average of values
96	#' centered around current one. Border values are averaged with available data.
97	#'
98	#' @param M_ A real matrix of size LxD
99	#' @param w_ The (odd) number of values to average
100	#'
101	#' @return The filtered matrix, of same size as the input
102	#' @export
103	filterMA = function(M_, w_)
104	.Call("filterMA", M_, w_, PACKAGE="epclust")
105
106	#' cleanBin
107	#'
108	#' Remove binary files to re-generate them at next run of \code{claws()}.
109	#' Note: run it in the folder where the computations occurred (or no effect).
110	#'
111	#' @export
112	cleanBin <- function()
113	{
114	bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
115	for (file in bin_files)
116	unlink(file)
117	}