| 1 | # Check integer arguments with functional conditions |
| 2 | .toInteger <- function(x, condition) |
| 3 | { |
| 4 | errWarn <- function(ignored) |
| 5 | paste("Cannot convert argument' ",substitute(x),"' to integer", sep="") |
| 6 | if (!is.integer(x)) |
| 7 | tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()}, |
| 8 | warning=errWarn, error=errWarn) |
| 9 | if (!condition(x)) |
| 10 | { |
| 11 | stop(paste("Argument '",substitute(x), |
| 12 | "' does not verify condition ",body(condition), sep="")) |
| 13 | } |
| 14 | x |
| 15 | } |
| 16 | |
| 17 | # Check logical arguments |
| 18 | .toLogical <- function(x) |
| 19 | { |
| 20 | errWarn <- function(ignored) |
| 21 | paste("Cannot convert argument' ",substitute(x),"' to logical", sep="") |
| 22 | if (!is.logical(x)) |
| 23 | tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()}, |
| 24 | warning=errWarn, error=errWarn) |
| 25 | x |
| 26 | } |
| 27 | |
| 28 | #' curvesToContribs |
| 29 | #' |
| 30 | #' Compute the discrete wavelet coefficients for each series, and aggregate them in |
| 31 | #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2 |
| 32 | #' |
| 33 | #' @param curves [big.]matrix of series (in columns), of size L x n |
| 34 | #' @inheritParams claws |
| 35 | #' |
| 36 | #' @return A matrix of size log(L) x n containing contributions in columns |
| 37 | #' |
| 38 | #' @export |
| 39 | curvesToContribs <- function(curves, wav_filt, contrib_type) |
| 40 | { |
| 41 | series <- as.matrix(curves) |
| 42 | L <- nrow(series) |
| 43 | D <- ceiling( log2(L) ) |
| 44 | # Series are interpolated to all have length 2^D |
| 45 | nb_sample_points <- 2^D |
| 46 | apply(series, 2, function(x) { |
| 47 | interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y |
| 48 | W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W |
| 49 | # Compute the sum of squared discrete wavelet coefficients, for each scale |
| 50 | nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) ) |
| 51 | if (contrib_type!="absolute") |
| 52 | nrj <- nrj / sum(nrj) |
| 53 | if (contrib_type=="logit") |
| 54 | nrj <- - log(1 - nrj) |
| 55 | unname( nrj ) |
| 56 | }) |
| 57 | } |
| 58 | |
| 59 | # Helper function to divide indices into balanced sets. |
| 60 | # Ensure that all indices sets have at least min_size elements. |
| 61 | .splitIndices <- function(indices, nb_per_set, min_size=1) |
| 62 | { |
| 63 | L <- length(indices) |
| 64 | nb_workers <- floor( L / nb_per_set ) |
| 65 | rem <- L %% nb_per_set |
| 66 | if (nb_workers == 0 || (nb_workers==1 && rem==0)) |
| 67 | { |
| 68 | # L <= nb_per_set, simple case |
| 69 | return (list(indices)) |
| 70 | } |
| 71 | |
| 72 | indices_workers <- lapply( seq_len(nb_workers), function(i) |
| 73 | indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] ) |
| 74 | |
| 75 | rem <- L %% nb_per_set #number of remaining unassigned items |
| 76 | if (rem == 0) |
| 77 | return (indices_workers) |
| 78 | |
| 79 | rem <- (L-rem+1):L |
| 80 | # If remainder is smaller than min_size, feed it with indices from other sets |
| 81 | # until either its size exceed min_size (success) or other sets' size |
| 82 | # get lower min_size (failure). |
| 83 | while (length(rem) < min_size) |
| 84 | { |
| 85 | index <- length(rem) %% nb_workers + 1 |
| 86 | if (length(indices_workers[[index]]) <= min_size) |
| 87 | { |
| 88 | stop("Impossible to split indices properly for clustering. |
| 89 | Try increasing nb_items_clust or decreasing K1") |
| 90 | } |
| 91 | rem <- c(rem, tail(indices_workers[[index]],1)) |
| 92 | indices_workers[[index]] <- head( indices_workers[[index]], -1) |
| 93 | } |
| 94 | return ( c(indices_workers, list(rem) ) ) |
| 95 | } |
| 96 | |
| 97 | #' assignMedoids |
| 98 | #' |
| 99 | #' Find the closest medoid for each curve in input |
| 100 | #' |
| 101 | #' @param curves (Chunk) of series whose medoids indices must be found |
| 102 | #' @param medoids Matrix of medoids (in columns) |
| 103 | #' |
| 104 | #' @return The vector of integer assignments |
| 105 | #' @export |
| 106 | assignMedoids <- function(curves, medoids) |
| 107 | { |
| 108 | nb_series <- ncol(curves) |
| 109 | mi <- rep(NA,nb_series) |
| 110 | for (i in seq_len(nb_series)) |
| 111 | mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) ) |
| 112 | mi |
| 113 | } |
| 114 | |
| 115 | #' filterMA |
| 116 | #' |
| 117 | #' Filter [time-]series by replacing all values by the moving average of values |
| 118 | #' centered around current one. Border values are averaged with available data. |
| 119 | #' |
| 120 | #' @param M_ A real matrix of size LxD |
| 121 | #' @param w_ The (odd) number of values to average |
| 122 | #' |
| 123 | #' @return The filtered matrix (in columns), of same size as the input |
| 124 | #' @export |
| 125 | filterMA <- function(M_, w_) |
| 126 | .Call("filterMA", M_, w_, PACKAGE="epclust") |
| 127 | |
| 128 | #' cleanBin |
| 129 | #' |
| 130 | #' Remove binary files to re-generate them at next run of \code{claws()}. |
| 131 | #' To be run in the folder where computations occurred (or no effect). |
| 132 | #' |
| 133 | #' @export |
| 134 | cleanBin <- function() |
| 135 | { |
| 136 | bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE) |
| 137 | for (file in bin_files) |
| 138 | unlink(file) |
| 139 | } |