72f59ecec977b0fe69aa892cf749b7bb0a9feb4b
[epclust.git] / epclust / R / utils.R
1 # Check integer arguments with functional conditions
2 .toInteger <- function(x, condition)
3 {
4 errWarn <- function(ignored)
5 paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
6 if (!is.integer(x))
7 tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
8 warning=errWarn, error=errWarn)
9 if (!condition(x))
10 {
11 stop(paste("Argument '",substitute(x),
12 "' does not verify condition ",body(condition), sep=""))
13 }
14 x
15 }
16
17 # Check logical arguments
18 .toLogical <- function(x)
19 {
20 errWarn <- function(ignored)
21 paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
22 if (!is.logical(x))
23 tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
24 warning=errWarn, error=errWarn)
25 x
26 }
27
28 #' curvesToContribs
29 #'
30 #' Compute the discrete wavelet coefficients for each series, and aggregate them in
31 #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
32 #'
33 #' @param curves [big.]matrix of series (in columns), of size L x n
34 #' @inheritParams claws
35 #'
36 #' @return A matrix of size log(L) x n containing contributions in columns
37 #'
38 #' @export
39 curvesToContribs <- function(curves, wav_filt, contrib_type)
40 {
41 series <- as.matrix(curves)
42 L <- nrow(series)
43 D <- ceiling( log2(L) )
44 # Series are interpolated to all have length 2^D
45 nb_sample_points <- 2^D
46 apply(series, 2, function(x) {
47 interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
48 W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
49 # Compute the sum of squared discrete wavelet coefficients, for each scale
50 nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
51 if (contrib_type!="absolute")
52 nrj <- nrj / sum(nrj)
53 if (contrib_type=="logit")
54 nrj <- - log(1 - nrj)
55 unname( nrj )
56 })
57 }
58
59 # Helper function to divide indices into balanced sets.
60 # Ensure that all indices sets have at least min_size elements.
61 .splitIndices <- function(indices, nb_per_set, min_size=1)
62 {
63 L <- length(indices)
64 nb_workers <- floor( L / nb_per_set )
65 rem <- L %% nb_per_set
66 if (nb_workers == 0 || (nb_workers==1 && rem==0))
67 {
68 # L <= nb_per_set, simple case
69 return (list(indices))
70 }
71
72 indices_workers <- lapply( seq_len(nb_workers), function(i)
73 indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
74
75 rem <- L %% nb_per_set #number of remaining unassigned items
76 if (rem == 0)
77 return (indices_workers)
78
79 rem <- (L-rem+1):L
80 # If remainder is smaller than min_size, feed it with indices from other sets
81 # until either its size exceed min_size (success) or other sets' size
82 # get lower min_size (failure).
83 while (length(rem) < min_size)
84 {
85 index <- length(rem) %% nb_workers + 1
86 if (length(indices_workers[[index]]) <= min_size)
87 {
88 stop("Impossible to split indices properly for clustering.
89 Try increasing nb_items_clust or decreasing K1")
90 }
91 rem <- c(rem, tail(indices_workers[[index]],1))
92 indices_workers[[index]] <- head( indices_workers[[index]], -1)
93 }
94 return ( c(indices_workers, list(rem) ) )
95 }
96
97 #' assignMedoids
98 #'
99 #' Find the closest medoid for each curve in input
100 #'
101 #' @param curves (Chunk) of series whose medoids indices must be found
102 #' @param medoids Matrix of medoids (in columns)
103 #'
104 #' @return The vector of integer assignments
105 #' @export
106 assignMedoids <- function(curves, medoids)
107 {
108 nb_series <- ncol(curves)
109 mi <- rep(NA,nb_series)
110 for (i in seq_len(nb_series))
111 mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
112 mi
113 }
114
115 #' filterMA
116 #'
117 #' Filter [time-]series by replacing all values by the moving average of values
118 #' centered around current one. Border values are averaged with available data.
119 #'
120 #' @param M_ A real matrix of size LxD
121 #' @param w_ The (odd) number of values to average
122 #'
123 #' @return The filtered matrix (in columns), of same size as the input
124 #' @export
125 filterMA <- function(M_, w_)
126 .Call("filterMA", M_, w_, PACKAGE="epclust")
127
128 #' cleanBin
129 #'
130 #' Remove binary files to re-generate them at next run of \code{claws()}.
131 #' To be run in the folder where computations occurred (or no effect).
132 #'
133 #' @export
134 cleanBin <- function()
135 {
136 bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
137 for (file in bin_files)
138 unlink(file)
139 }