improve/fix comments - TODO: debug examples, CSV and after
[epclust.git] / epclust / R / utils.R
... / ...
CommitLineData
1# Check integer arguments with functional conditions
2.toInteger <- function(x, condition)
3{
4 errWarn <- function(ignored)
5 paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
6 if (!is.integer(x))
7 tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
8 warning=errWarn, error=errWarn)
9 if (!condition(x))
10 {
11 stop(paste("Argument '",substitute(x),
12 "' does not verify condition ",body(condition), sep=""))
13 }
14 x
15}
16
17# Check logical arguments
18.toLogical <- function(x)
19{
20 errWarn <- function(ignored)
21 paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
22 if (!is.logical(x))
23 tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
24 warning=errWarn, error=errWarn)
25 x
26}
27
28#' curvesToContribs
29#'
30#' Compute the discrete wavelet coefficients for each series, and aggregate them in
31#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
32#'
33#' @param curves [big.]matrix of series (in columns), of size L x n
34#' @inheritParams claws
35#'
36#' @return A matrix of size log(L) x n containing contributions in columns
37#'
38#' @export
39curvesToContribs <- function(curves, wav_filt, contrib_type)
40{
41 series <- as.matrix(curves)
42 L <- nrow(series)
43 D <- ceiling( log2(L) )
44 # Series are interpolated to all have length 2^D
45 nb_sample_points <- 2^D
46 apply(series, 2, function(x) {
47 interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
48 W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
49 # Compute the sum of squared discrete wavelet coefficients, for each scale
50 nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
51 if (contrib_type!="absolute")
52 nrj <- nrj / sum(nrj)
53 if (contrib_type=="logit")
54 nrj <- - log(1 - nrj)
55 unname( nrj )
56 })
57}
58
59# Helper function to divide indices into balanced sets.
60# Ensure that all indices sets have at least min_size elements.
61.splitIndices <- function(indices, nb_per_set, min_size=1)
62{
63 L <- length(indices)
64 nb_workers <- floor( L / nb_per_set )
65 rem <- L %% nb_per_set
66 if (nb_workers == 0 || (nb_workers==1 && rem==0))
67 {
68 # L <= nb_per_set, simple case
69 return (list(indices))
70 }
71
72 indices_workers <- lapply( seq_len(nb_workers), function(i)
73 indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
74
75 rem <- L %% nb_per_set #number of remaining unassigned items
76 if (rem == 0)
77 return (indices_workers)
78
79 rem <- (L-rem+1):L
80 # If remainder is smaller than min_size, feed it with indices from other sets
81 # until either its size exceed min_size (success) or other sets' size
82 # get lower min_size (failure).
83 while (length(rem) < min_size)
84 {
85 index <- length(rem) %% nb_workers + 1
86 if (length(indices_workers[[index]]) <= min_size)
87 {
88 stop("Impossible to split indices properly for clustering.
89 Try increasing nb_items_clust or decreasing K1")
90 }
91 rem <- c(rem, tail(indices_workers[[index]],1))
92 indices_workers[[index]] <- head( indices_workers[[index]], -1)
93 }
94 return ( c(indices_workers, list(rem) ) )
95}
96
97#' assignMedoids
98#'
99#' Find the closest medoid for each curve in input
100#'
101#' @param curves (Chunk) of series whose medoids indices must be found
102#' @param medoids Matrix of medoids (in columns)
103#'
104#' @return The vector of integer assignments
105#' @export
106assignMedoids <- function(curves, medoids)
107{
108 nb_series <- ncol(curves)
109 mi <- rep(NA,nb_series)
110 for (i in seq_len(nb_series))
111 mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
112 mi
113}
114
115#' filterMA
116#'
117#' Filter [time-]series by replacing all values by the moving average of values
118#' centered around current one. Border values are averaged with available data.
119#'
120#' @param M_ A real matrix of size LxD
121#' @param w_ The (odd) number of values to average
122#'
123#' @return The filtered matrix (in columns), of same size as the input
124#' @export
125filterMA <- function(M_, w_)
126 .Call("filterMA", M_, w_, PACKAGE="epclust")
127
128#' cleanBin
129#'
130#' Remove binary files to re-generate them at next run of \code{claws()}.
131#' To be run in the folder where computations occurred (or no effect).
132#'
133#' @export
134cleanBin <- function()
135{
136 bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
137 for (file in bin_files)
138 unlink(file)
139}