load-balancing parLapply for WER dists computations
[epclust.git] / epclust / R / utils.R
CommitLineData
40f12a2f
BA
1# Check integer arguments with functional conditions
2.toInteger <- function(x, condition)
3{
4 errWarn <- function(ignored)
5 paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
6 if (!is.integer(x))
282342ba
BA
7 tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
8 warning=errWarn, error=errWarn)
40f12a2f
BA
9 if (!condition(x))
10 {
11 stop(paste("Argument '",substitute(x),
12 "' does not verify condition ",body(condition), sep=""))
13 }
14 x
15}
16
17# Check logical arguments
18.toLogical <- function(x)
19{
20 errWarn <- function(ignored)
21 paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
22 if (!is.logical(x))
282342ba
BA
23 tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
24 warning=errWarn, error=errWarn)
40f12a2f
BA
25 x
26}
27
28#' curvesToContribs
29#'
30#' Compute the discrete wavelet coefficients for each series, and aggregate them in
31#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
32#'
3c5a4b08 33#' @param curves [big.]matrix of series (in columns), of size L x n
40f12a2f
BA
34#' @inheritParams claws
35#'
36#' @return A matrix of size log(L) x n containing contributions in columns
37#'
38#' @export
3fb6e823 39curvesToContribs <- function(curves, wav_filt, contrib_type)
40f12a2f 40{
3fb6e823 41 series <- as.matrix(curves)
282342ba
BA
42 L <- nrow(series)
43 D <- ceiling( log2(L) )
40f12a2f 44 # Series are interpolated to all have length 2^D
282342ba 45 nb_sample_points <- 2^D
40f12a2f 46 apply(series, 2, function(x) {
282342ba
BA
47 interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
48 W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
40f12a2f 49 # Compute the sum of squared discrete wavelet coefficients, for each scale
282342ba 50 nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
40f12a2f 51 if (contrib_type!="absolute")
282342ba 52 nrj <- nrj / sum(nrj)
40f12a2f 53 if (contrib_type=="logit")
282342ba
BA
54 nrj <- - log(1 - nrj)
55 unname( nrj )
40f12a2f
BA
56 })
57}
58
3c5a4b08
BA
59# Helper function to divide indices into balanced sets.
60# Ensure that all indices sets have at least min_size elements.
282342ba 61.splitIndices <- function(indices, nb_per_set, min_size=1)
40f12a2f 62{
282342ba
BA
63 L <- length(indices)
64 nb_workers <- floor( L / nb_per_set )
65 rem <- L %% nb_per_set
40f12a2f
BA
66 if (nb_workers == 0 || (nb_workers==1 && rem==0))
67 {
68 # L <= nb_per_set, simple case
3c5a4b08 69 return (list(indices))
40f12a2f 70 }
40f12a2f 71
282342ba 72 indices_workers <- lapply( seq_len(nb_workers), function(i)
3c5a4b08 73 indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
40f12a2f 74
282342ba 75 rem <- L %% nb_per_set #number of remaining unassigned items
3c5a4b08
BA
76 if (rem == 0)
77 return (indices_workers)
78
79 rem <- (L-rem+1):L
80 # If remainder is smaller than min_size, feed it with indices from other sets
81 # until either its size exceed min_size (success) or other sets' size
82 # get lower min_size (failure).
83 while (length(rem) < min_size)
84 {
282342ba 85 index <- length(rem) %% nb_workers + 1
3c5a4b08 86 if (length(indices_workers[[index]]) <= min_size)
40f12a2f 87 {
3c5a4b08
BA
88 stop("Impossible to split indices properly for clustering.
89 Try increasing nb_items_clust or decreasing K1")
40f12a2f 90 }
282342ba
BA
91 rem <- c(rem, tail(indices_workers[[index]],1))
92 indices_workers[[index]] <- head( indices_workers[[index]], -1)
40f12a2f 93 }
3c5a4b08 94 return ( c(indices_workers, list(rem) ) )
40f12a2f
BA
95}
96
282342ba
BA
97#' assignMedoids
98#'
3fb6e823 99#' Find the closest medoid for each curve in input
282342ba
BA
100#'
101#' @param curves (Chunk) of series whose medoids indices must be found
102#' @param medoids Matrix of medoids (in columns)
103#'
104#' @return The vector of integer assignments
105#' @export
106assignMedoids <- function(curves, medoids)
107{
108 nb_series <- ncol(curves)
109 mi <- rep(NA,nb_series)
110 for (i in seq_len(nb_series))
111 mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
112 mi
113}
114
40f12a2f
BA
115#' filterMA
116#'
117#' Filter [time-]series by replacing all values by the moving average of values
118#' centered around current one. Border values are averaged with available data.
119#'
120#' @param M_ A real matrix of size LxD
121#' @param w_ The (odd) number of values to average
122#'
3c5a4b08 123#' @return The filtered matrix (in columns), of same size as the input
40f12a2f 124#' @export
282342ba 125filterMA <- function(M_, w_)
40f12a2f
BA
126 .Call("filterMA", M_, w_, PACKAGE="epclust")
127
128#' cleanBin
129#'
130#' Remove binary files to re-generate them at next run of \code{claws()}.
3fb6e823 131#' To be run in the folder where computations occurred (or no effect).
40f12a2f
BA
132#'
133#' @export
134cleanBin <- function()
135{
282342ba 136 bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
40f12a2f
BA
137 for (file in bin_files)
138 unlink(file)
139}