X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=pkg%2FR%2Futils.R;fp=pkg%2FR%2Futils.R;h=12fada2da00918639c0eb98ef6104839a501031c;hp=0000000000000000000000000000000000000000;hb=e906736ea27105237e84c904dce6170353726292;hpb=57f337af19cd6251815bb1ff2d62f4c58e8b6078

diff --git a/pkg/R/utils.R b/pkg/R/utils.R
new file mode 100644
index 0000000..12fada2
--- /dev/null
+++ b/pkg/R/utils.R
@@ -0,0 +1,140 @@
+# Check integer arguments with functional conditions
+.toInteger <- function(x, condition)
+{
+	errWarn <- function(ignored)
+		paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
+	if (!is.integer(x))
+		tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
+			warning=errWarn, error=errWarn)
+	if (!condition(x))
+	{
+		stop(paste("Argument '",substitute(x),
+			"' does not verify condition ",body(condition), sep=""))
+	}
+	x
+}
+
+# Check logical arguments
+.toLogical <- function(x)
+{
+	errWarn <- function(ignored)
+		paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
+	if (!is.logical(x))
+		tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
+			warning=errWarn, error=errWarn)
+	x
+}
+
+#' curvesToContribs
+#'
+#' Compute the discrete wavelet coefficients for each series, and aggregate them in
+#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
+#'
+#' @param curves [big.]matrix of series (in columns), of size L x n
+#' @param wav_filt Wavelet transform filter, as a vector c(Family,FilterNumber)
+#' @inheritParams claws
+#'
+#' @return A matrix of size log(L) x n containing contributions in columns
+#'
+#' @export
+curvesToContribs <- function(curves, wav_filt, contrib_type)
+{
+	series <- as.matrix(curves)
+	L <- nrow(series)
+	D <- ceiling( log2(L) )
+	# Series are interpolated to all have length 2^D
+	nb_sample_points <- 2^D
+	apply(series, 2, function(x) {
+		interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
+		W <- wavethresh::wd(interpolated_curve, wav_filt[2], wav_filt[1])$D
+		# Compute the sum of squared discrete wavelet coefficients, for each scale
+		nrj <- sapply( 1:D, function(i) ( sqrt( sum(W[(2^D-(2^i-1)):(2^D-2^(i-1))]^2) ) ) )
+		if (contrib_type!="absolute")
+			nrj <- nrj / sum(nrj)
+		if (contrib_type=="logit")
+			nrj <- - log(1 - nrj)
+		unname( nrj )
+	})
+}
+
+# Helper function to divide indices into balanced sets.
+# Ensure that all indices sets have at least min_size elements.
+.splitIndices <- function(indices, nb_per_set, min_size=1)
+{
+	L <- length(indices)
+	nb_workers <- floor( L / nb_per_set )
+	rem <- L %% nb_per_set
+	if (nb_workers == 0 || (nb_workers==1 && rem==0))
+	{
+		# L <= nb_per_set, simple case
+		return (list(indices))
+	}
+
+	indices_workers <- lapply( seq_len(nb_workers), function(i)
+		indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
+
+	rem <- L %% nb_per_set #number of remaining unassigned items
+	if (rem == 0)
+		return (indices_workers)
+
+	rem <- (L-rem+1):L
+	# If remainder is smaller than min_size, feed it with indices from other sets
+	# until either its size exceed min_size (success) or other sets' size
+	# get lower min_size (failure).
+	while (length(rem) < min_size)
+	{
+		index <- length(rem) %% nb_workers + 1
+		if (length(indices_workers[[index]]) <= min_size)
+		{
+			stop("Impossible to split indices properly for clustering.
+				Try increasing nb_items_clust or decreasing K1")
+		}
+		rem <- c(rem, tail(indices_workers[[index]],1))
+		indices_workers[[index]] <- head( indices_workers[[index]], -1)
+	}
+	return ( c(indices_workers, list(rem) ) )
+}
+
+#' assignMedoids
+#'
+#' Find the closest medoid for each curve in input
+#'
+#' @param curves (Chunk) of series whose medoids indices must be found
+#' @param medoids Matrix of medoids (in columns)
+#'
+#' @return The vector of integer assignments
+#' @export
+assignMedoids <- function(curves, medoids)
+{
+	nb_series <- ncol(curves)
+	mi <- rep(NA,nb_series)
+	for (i in seq_len(nb_series))
+		mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
+	mi
+}
+
+#' filterMA
+#'
+#' Filter [time-]series by replacing all values by the moving average of values
+#' centered around current one. Border values are averaged with available data.
+#'
+#' @param M_ A real matrix of size LxD
+#' @param w_ The (odd) number of values to average
+#'
+#' @return The filtered matrix (in columns), of same size as the input
+#' @export
+filterMA <- function(M_, w_)
+	.Call("filterMA", M_, w_, PACKAGE="epclust")
+
+#' cleanBin
+#'
+#' Remove binary files to re-generate them at next run of \code{claws()}.
+#' To be run in the folder where computations occurred (or no effect).
+#'
+#' @export
+cleanBin <- function()
+{
+	bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
+	for (file in bin_files)
+		unlink(file)
+}