ba643d0b5ad198907e2b24c793244825a5e77446
[epclust.git] / epclust / R / utils.R
1 # Check integer arguments with functional conditions
2 .toInteger <- function(x, condition)
3 {
4 errWarn <- function(ignored)
5 paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
6 if (!is.integer(x))
7 tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
8 warning = errWarn, error = errWarn)
9 if (!condition(x))
10 {
11 stop(paste("Argument '",substitute(x),
12 "' does not verify condition ",body(condition), sep=""))
13 }
14 x
15 }
16
17 # Check logical arguments
18 .toLogical <- function(x)
19 {
20 errWarn <- function(ignored)
21 paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
22 if (!is.logical(x))
23 tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
24 warning = errWarn, error = errWarn)
25 x
26 }
27
28 #' curvesToContribs
29 #'
30 #' Compute the discrete wavelet coefficients for each series, and aggregate them in
31 #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
32 #'
33 #' @param series [big.]matrix of series (in columns), of size L x n
34 #' @inheritParams claws
35 #'
36 #' @return A matrix of size log(L) x n containing contributions in columns
37 #'
38 #' @export
39 curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
40 {
41 L = nrow(series)
42 D = ceiling( log2(L) )
43 # Series are interpolated to all have length 2^D
44 nb_sample_points = 2^D
45 apply(series, 2, function(x) {
46 interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
47 W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
48 # Compute the sum of squared discrete wavelet coefficients, for each scale
49 nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
50 if (contrib_type!="absolute")
51 nrj = nrj / sum(nrj)
52 if (contrib_type=="logit")
53 nrj = - log(1 - nrj)
54 nrj
55 })
56 }
57
58 # Helper function to divide indices into balanced sets
59 # If max == TRUE, sets sizes cannot exceed nb_per_set
60 .splitIndices = function(indices, nb_per_set, max=FALSE)
61 {
62 L = length(indices)
63 nb_workers = floor( L / nb_per_set )
64 rem = L %% nb_per_set
65 if (nb_workers == 0 || (nb_workers==1 && rem==0))
66 {
67 # L <= nb_per_set, simple case
68 indices_workers = list(indices)
69 }
70 else
71 {
72 indices_workers = lapply( seq_len(nb_workers), function(i)
73 indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
74
75 if (max)
76 {
77 # Sets are not so well balanced, but size is supposed to be critical
78 return ( c( indices_workers, if (rem>0) list((L-rem+1):L) else NULL ) )
79 }
80
81 # Spread the remaining load among the workers
82 rem = L %% nb_per_set
83 while (rem > 0)
84 {
85 index = rem%%nb_workers + 1
86 indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
87 rem = rem - 1
88 }
89 }
90 indices_workers
91 }
92
93 #' filterMA
94 #'
95 #' Filter [time-]series by replacing all values by the moving average of values
96 #' centered around current one. Border values are averaged with available data.
97 #'
98 #' @param M_ A real matrix of size LxD
99 #' @param w_ The (odd) number of values to average
100 #'
101 #' @return The filtered matrix, of same size as the input
102 #' @export
103 filterMA = function(M_, w_)
104 .Call("filterMA", M_, w_, PACKAGE="epclust")
105
106 #' cleanBin
107 #'
108 #' Remove binary files to re-generate them at next run of \code{claws()}.
109 #' Note: run it in the folder where the computations occurred (or no effect).
110 #'
111 #' @export
112 cleanBin <- function()
113 {
114 bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
115 for (file in bin_files)
116 unlink(file)
117 }