'update'
[epclust.git] / epclust / R / utils.R
CommitLineData
40f12a2f
BA
1# Check integer arguments with functional conditions
2.toInteger <- function(x, condition)
3{
4 errWarn <- function(ignored)
5 paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
6 if (!is.integer(x))
7 tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
8 warning = errWarn, error = errWarn)
9 if (!condition(x))
10 {
11 stop(paste("Argument '",substitute(x),
12 "' does not verify condition ",body(condition), sep=""))
13 }
14 x
15}
16
17# Check logical arguments
18.toLogical <- function(x)
19{
20 errWarn <- function(ignored)
21 paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
22 if (!is.logical(x))
23 tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
24 warning = errWarn, error = errWarn)
25 x
26}
27
28#' curvesToContribs
29#'
30#' Compute the discrete wavelet coefficients for each series, and aggregate them in
31#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
32#'
3c5a4b08 33#' @param curves [big.]matrix of series (in columns), of size L x n
40f12a2f
BA
34#' @inheritParams claws
35#'
36#' @return A matrix of size log(L) x n containing contributions in columns
37#'
38#' @export
3c5a4b08 39curvesToContribs = function(series, wav_filt, contrib_type)
40f12a2f
BA
40{
41 L = nrow(series)
42 D = ceiling( log2(L) )
43 # Series are interpolated to all have length 2^D
44 nb_sample_points = 2^D
45 apply(series, 2, function(x) {
46 interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
47 W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
48 # Compute the sum of squared discrete wavelet coefficients, for each scale
49 nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
50 if (contrib_type!="absolute")
51 nrj = nrj / sum(nrj)
52 if (contrib_type=="logit")
53 nrj = - log(1 - nrj)
54 nrj
55 })
56}
57
3c5a4b08
BA
58# Helper function to divide indices into balanced sets.
59# Ensure that all indices sets have at least min_size elements.
60.splitIndices = function(indices, nb_per_set, min_size=1)
40f12a2f
BA
61{
62 L = length(indices)
63 nb_workers = floor( L / nb_per_set )
64 rem = L %% nb_per_set
65 if (nb_workers == 0 || (nb_workers==1 && rem==0))
66 {
67 # L <= nb_per_set, simple case
3c5a4b08 68 return (list(indices))
40f12a2f 69 }
40f12a2f 70
3c5a4b08
BA
71 indices_workers = lapply( seq_len(nb_workers), function(i)
72 indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
40f12a2f 73
3c5a4b08
BA
74 rem = L %% nb_per_set #number of remaining unassigned items
75 if (rem == 0)
76 return (indices_workers)
77
78 rem <- (L-rem+1):L
79 # If remainder is smaller than min_size, feed it with indices from other sets
80 # until either its size exceed min_size (success) or other sets' size
81 # get lower min_size (failure).
82 while (length(rem) < min_size)
83 {
84 index = length(rem) %% nb_workers + 1
85 if (length(indices_workers[[index]]) <= min_size)
40f12a2f 86 {
3c5a4b08
BA
87 stop("Impossible to split indices properly for clustering.
88 Try increasing nb_items_clust or decreasing K1")
40f12a2f 89 }
3c5a4b08
BA
90 rem = c(rem, tail(indices_workers[[index]],1))
91 indices_workers[[index]] = head( indices_workers[[index]], -1)
40f12a2f 92 }
3c5a4b08 93 return ( c(indices_workers, list(rem) ) )
40f12a2f
BA
94}
95
96#' filterMA
97#'
98#' Filter [time-]series by replacing all values by the moving average of values
99#' centered around current one. Border values are averaged with available data.
100#'
101#' @param M_ A real matrix of size LxD
102#' @param w_ The (odd) number of values to average
103#'
3c5a4b08 104#' @return The filtered matrix (in columns), of same size as the input
40f12a2f
BA
105#' @export
106filterMA = function(M_, w_)
107 .Call("filterMA", M_, w_, PACKAGE="epclust")
108
109#' cleanBin
110#'
111#' Remove binary files to re-generate them at next run of \code{claws()}.
112#' Note: run it in the folder where the computations occurred (or no effect).
113#'
114#' @export
115cleanBin <- function()
116{
117 bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
118 for (file in bin_files)
119 unlink(file)
120}