Commit | Line | Data |
---|---|---|
b7cd987d BA |
1 | # Check integer arguments with functional conditions |
2 | .toInteger <- function(x, condition) | |
3 | { | |
4 | errWarn <- function(ignored) | |
5 | paste("Cannot convert argument' ",substitute(x),"' to integer", sep="") | |
6 | if (!is.integer(x)) | |
7 | tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()}, | |
8 | warning=errWarn, error=errWarn) | |
9 | if (!condition(x)) | |
10 | { | |
11 | stop(paste("Argument '",substitute(x), | |
12 | "' does not verify condition ",body(condition), sep="")) | |
13 | } | |
14 | x | |
15 | } | |
16 | ||
17 | # Check logical arguments | |
18 | .toLogical <- function(x) | |
19 | { | |
20 | errWarn <- function(ignored) | |
21 | paste("Cannot convert argument' ",substitute(x),"' to logical", sep="") | |
22 | if (!is.logical(x)) | |
23 | tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()}, | |
24 | warning=errWarn, error=errWarn) | |
25 | x | |
26 | } | |
27 | ||
28 | #' curvesToContribs | |
29 | #' | |
30 | #' Compute the discrete wavelet coefficients for each series, and aggregate them in | |
31 | #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2 | |
32 | #' | |
33 | #' @param curves [big.]matrix of series (in columns), of size L x n | |
34 | #' @param wav_filt Wavelet transform filter, as a vector c(Family,FilterNumber) | |
35 | #' @inheritParams claws | |
36 | #' | |
37 | #' @return A matrix of size log(L) x n containing contributions in columns | |
38 | #' | |
39 | #' @export | |
40 | curvesToContribs <- function(curves, wav_filt, contrib_type) | |
41 | { | |
42 | series <- as.matrix(curves) | |
43 | L <- nrow(series) | |
44 | D <- ceiling( log2(L) ) | |
45 | # Series are interpolated to all have length 2^D | |
46 | nb_sample_points <- 2^D | |
47 | apply(series, 2, function(x) { | |
48 | interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y | |
49 | W <- wavethresh::wd(interpolated_curve, wav_filt[2], wav_filt[1])$D | |
50 | # Compute the sum of squared discrete wavelet coefficients, for each scale | |
51 | nrj <- sapply( 1:D, function(i) ( sqrt( sum(W[(2^D-(2^i-1)):(2^D-2^(i-1))]^2) ) ) ) | |
52 | if (contrib_type!="absolute") | |
53 | nrj <- nrj / sum(nrj) | |
54 | if (contrib_type=="logit") | |
55 | nrj <- - log(1 - nrj) | |
56 | unname( nrj ) | |
57 | }) | |
58 | } | |
59 | ||
60 | # Helper function to divide indices into balanced sets. | |
61 | # Ensure that all indices sets have at least min_size elements. | |
62 | .splitIndices <- function(indices, nb_per_set, min_size=1) | |
63 | { | |
64 | L <- length(indices) | |
65 | nb_workers <- floor( L / nb_per_set ) | |
66 | rem <- L %% nb_per_set | |
67 | if (nb_workers == 0 || (nb_workers==1 && rem==0)) | |
68 | { | |
69 | # L <= nb_per_set, simple case | |
70 | return (list(indices)) | |
71 | } | |
72 | ||
73 | indices_workers <- lapply( seq_len(nb_workers), function(i) | |
74 | indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] ) | |
75 | ||
76 | rem <- L %% nb_per_set #number of remaining unassigned items | |
77 | if (rem == 0) | |
78 | return (indices_workers) | |
79 | ||
80 | rem <- (L-rem+1):L | |
81 | # If remainder is smaller than min_size, feed it with indices from other sets | |
82 | # until either its size exceed min_size (success) or other sets' size | |
83 | # get lower min_size (failure). | |
84 | while (length(rem) < min_size) | |
85 | { | |
86 | index <- length(rem) %% nb_workers + 1 | |
87 | if (length(indices_workers[[index]]) <= min_size) | |
88 | { | |
89 | stop("Impossible to split indices properly for clustering. | |
90 | Try increasing nb_items_clust or decreasing K1") | |
91 | } | |
92 | rem <- c(rem, tail(indices_workers[[index]],1)) | |
93 | indices_workers[[index]] <- head( indices_workers[[index]], -1) | |
94 | } | |
95 | return ( c(indices_workers, list(rem) ) ) | |
96 | } | |
97 | ||
98 | #' assignMedoids | |
99 | #' | |
100 | #' Find the closest medoid for each curve in input | |
101 | #' | |
102 | #' @param curves (Chunk) of series whose medoids indices must be found | |
103 | #' @param medoids Matrix of medoids (in columns) | |
104 | #' | |
105 | #' @return The vector of integer assignments | |
106 | #' @export | |
107 | assignMedoids <- function(curves, medoids) | |
108 | { | |
109 | nb_series <- ncol(curves) | |
110 | mi <- rep(NA,nb_series) | |
111 | for (i in seq_len(nb_series)) | |
112 | mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) ) | |
113 | mi | |
114 | } | |
115 | ||
116 | #' filterMA | |
117 | #' | |
118 | #' Filter [time-]series by replacing all values by the moving average of values | |
119 | #' centered around current one. Border values are averaged with available data. | |
120 | #' | |
121 | #' @param M_ A real matrix of size LxD | |
122 | #' @param w_ The (odd) number of values to average | |
123 | #' | |
124 | #' @return The filtered matrix (in columns), of same size as the input | |
125 | #' @export | |
126 | filterMA <- function(M_, w_) | |
127 | .Call("filterMA", M_, w_, PACKAGE="epclust") | |
128 | ||
129 | #' cleanBin | |
130 | #' | |
131 | #' Remove binary files to re-generate them at next run of \code{claws()}. | |
132 | #' To be run in the folder where computations occurred (or no effect). | |
133 | #' | |
134 | #' @export | |
135 | cleanBin <- function() | |
136 | { | |
137 | bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE) | |
138 | for (file in bin_files) | |
139 | unlink(file) | |
140 | } |