Commit | Line | Data |
---|---|---|
40f12a2f BA |
1 | # Check integer arguments with functional conditions |
2 | .toInteger <- function(x, condition) | |
3 | { | |
4 | errWarn <- function(ignored) | |
5 | paste("Cannot convert argument' ",substitute(x),"' to integer", sep="") | |
6 | if (!is.integer(x)) | |
282342ba BA |
7 | tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()}, |
8 | warning=errWarn, error=errWarn) | |
40f12a2f BA |
9 | if (!condition(x)) |
10 | { | |
11 | stop(paste("Argument '",substitute(x), | |
12 | "' does not verify condition ",body(condition), sep="")) | |
13 | } | |
14 | x | |
15 | } | |
16 | ||
17 | # Check logical arguments | |
18 | .toLogical <- function(x) | |
19 | { | |
20 | errWarn <- function(ignored) | |
21 | paste("Cannot convert argument' ",substitute(x),"' to logical", sep="") | |
22 | if (!is.logical(x)) | |
282342ba BA |
23 | tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()}, |
24 | warning=errWarn, error=errWarn) | |
40f12a2f BA |
25 | x |
26 | } | |
27 | ||
28 | #' curvesToContribs | |
29 | #' | |
30 | #' Compute the discrete wavelet coefficients for each series, and aggregate them in | |
31 | #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2 | |
32 | #' | |
3c5a4b08 | 33 | #' @param curves [big.]matrix of series (in columns), of size L x n |
40f12a2f BA |
34 | #' @inheritParams claws |
35 | #' | |
36 | #' @return A matrix of size log(L) x n containing contributions in columns | |
37 | #' | |
38 | #' @export | |
3fb6e823 | 39 | curvesToContribs <- function(curves, wav_filt, contrib_type) |
40f12a2f | 40 | { |
3fb6e823 | 41 | series <- as.matrix(curves) |
282342ba BA |
42 | L <- nrow(series) |
43 | D <- ceiling( log2(L) ) | |
40f12a2f | 44 | # Series are interpolated to all have length 2^D |
282342ba | 45 | nb_sample_points <- 2^D |
40f12a2f | 46 | apply(series, 2, function(x) { |
282342ba BA |
47 | interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y |
48 | W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W | |
40f12a2f | 49 | # Compute the sum of squared discrete wavelet coefficients, for each scale |
282342ba | 50 | nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) ) |
40f12a2f | 51 | if (contrib_type!="absolute") |
282342ba | 52 | nrj <- nrj / sum(nrj) |
40f12a2f | 53 | if (contrib_type=="logit") |
282342ba BA |
54 | nrj <- - log(1 - nrj) |
55 | unname( nrj ) | |
40f12a2f BA |
56 | }) |
57 | } | |
58 | ||
3c5a4b08 BA |
59 | # Helper function to divide indices into balanced sets. |
60 | # Ensure that all indices sets have at least min_size elements. | |
282342ba | 61 | .splitIndices <- function(indices, nb_per_set, min_size=1) |
40f12a2f | 62 | { |
282342ba BA |
63 | L <- length(indices) |
64 | nb_workers <- floor( L / nb_per_set ) | |
65 | rem <- L %% nb_per_set | |
40f12a2f BA |
66 | if (nb_workers == 0 || (nb_workers==1 && rem==0)) |
67 | { | |
68 | # L <= nb_per_set, simple case | |
3c5a4b08 | 69 | return (list(indices)) |
40f12a2f | 70 | } |
40f12a2f | 71 | |
282342ba | 72 | indices_workers <- lapply( seq_len(nb_workers), function(i) |
3c5a4b08 | 73 | indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] ) |
40f12a2f | 74 | |
282342ba | 75 | rem <- L %% nb_per_set #number of remaining unassigned items |
3c5a4b08 BA |
76 | if (rem == 0) |
77 | return (indices_workers) | |
78 | ||
79 | rem <- (L-rem+1):L | |
80 | # If remainder is smaller than min_size, feed it with indices from other sets | |
81 | # until either its size exceed min_size (success) or other sets' size | |
82 | # get lower min_size (failure). | |
83 | while (length(rem) < min_size) | |
84 | { | |
282342ba | 85 | index <- length(rem) %% nb_workers + 1 |
3c5a4b08 | 86 | if (length(indices_workers[[index]]) <= min_size) |
40f12a2f | 87 | { |
3c5a4b08 BA |
88 | stop("Impossible to split indices properly for clustering. |
89 | Try increasing nb_items_clust or decreasing K1") | |
40f12a2f | 90 | } |
282342ba BA |
91 | rem <- c(rem, tail(indices_workers[[index]],1)) |
92 | indices_workers[[index]] <- head( indices_workers[[index]], -1) | |
40f12a2f | 93 | } |
3c5a4b08 | 94 | return ( c(indices_workers, list(rem) ) ) |
40f12a2f BA |
95 | } |
96 | ||
282342ba BA |
97 | #' assignMedoids |
98 | #' | |
3fb6e823 | 99 | #' Find the closest medoid for each curve in input |
282342ba BA |
100 | #' |
101 | #' @param curves (Chunk) of series whose medoids indices must be found | |
102 | #' @param medoids Matrix of medoids (in columns) | |
103 | #' | |
104 | #' @return The vector of integer assignments | |
105 | #' @export | |
106 | assignMedoids <- function(curves, medoids) | |
107 | { | |
108 | nb_series <- ncol(curves) | |
109 | mi <- rep(NA,nb_series) | |
110 | for (i in seq_len(nb_series)) | |
111 | mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) ) | |
112 | mi | |
113 | } | |
114 | ||
40f12a2f BA |
115 | #' filterMA |
116 | #' | |
117 | #' Filter [time-]series by replacing all values by the moving average of values | |
118 | #' centered around current one. Border values are averaged with available data. | |
119 | #' | |
120 | #' @param M_ A real matrix of size LxD | |
121 | #' @param w_ The (odd) number of values to average | |
122 | #' | |
3c5a4b08 | 123 | #' @return The filtered matrix (in columns), of same size as the input |
40f12a2f | 124 | #' @export |
282342ba | 125 | filterMA <- function(M_, w_) |
40f12a2f BA |
126 | .Call("filterMA", M_, w_, PACKAGE="epclust") |
127 | ||
128 | #' cleanBin | |
129 | #' | |
130 | #' Remove binary files to re-generate them at next run of \code{claws()}. | |
3fb6e823 | 131 | #' To be run in the folder where computations occurred (or no effect). |
40f12a2f BA |
132 | #' |
133 | #' @export | |
134 | cleanBin <- function() | |
135 | { | |
282342ba | 136 | bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE) |
40f12a2f BA |
137 | for (file in bin_files) |
138 | unlink(file) | |
139 | } |