First commit
[epclust.git] / pkg / R / utils.R
1 # Check integer arguments with functional conditions
2 .toInteger <- function(x, condition)
3 {
4 errWarn <- function(ignored)
5 paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
6 if (!is.integer(x))
7 tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
8 warning=errWarn, error=errWarn)
9 if (!condition(x))
10 {
11 stop(paste("Argument '",substitute(x),
12 "' does not verify condition ",body(condition), sep=""))
13 }
14 x
15 }
16
17 # Check logical arguments
18 .toLogical <- function(x)
19 {
20 errWarn <- function(ignored)
21 paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
22 if (!is.logical(x))
23 tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
24 warning=errWarn, error=errWarn)
25 x
26 }
27
28 #' curvesToContribs
29 #'
30 #' Compute the discrete wavelet coefficients for each series, and aggregate them in
31 #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
32 #'
33 #' @param curves [big.]matrix of series (in columns), of size L x n
34 #' @param wav_filt Wavelet transform filter, as a vector c(Family,FilterNumber)
35 #' @inheritParams claws
36 #'
37 #' @return A matrix of size log(L) x n containing contributions in columns
38 #'
39 #' @export
40 curvesToContribs <- function(curves, wav_filt, contrib_type)
41 {
42 series <- as.matrix(curves)
43 L <- nrow(series)
44 D <- ceiling( log2(L) )
45 # Series are interpolated to all have length 2^D
46 nb_sample_points <- 2^D
47 apply(series, 2, function(x) {
48 interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
49 W <- wavethresh::wd(interpolated_curve, wav_filt[2], wav_filt[1])$D
50 # Compute the sum of squared discrete wavelet coefficients, for each scale
51 nrj <- sapply( 1:D, function(i) ( sqrt( sum(W[(2^D-(2^i-1)):(2^D-2^(i-1))]^2) ) ) )
52 if (contrib_type!="absolute")
53 nrj <- nrj / sum(nrj)
54 if (contrib_type=="logit")
55 nrj <- - log(1 - nrj)
56 unname( nrj )
57 })
58 }
59
60 # Helper function to divide indices into balanced sets.
61 # Ensure that all indices sets have at least min_size elements.
62 .splitIndices <- function(indices, nb_per_set, min_size=1)
63 {
64 L <- length(indices)
65 nb_workers <- floor( L / nb_per_set )
66 rem <- L %% nb_per_set
67 if (nb_workers == 0 || (nb_workers==1 && rem==0))
68 {
69 # L <= nb_per_set, simple case
70 return (list(indices))
71 }
72
73 indices_workers <- lapply( seq_len(nb_workers), function(i)
74 indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
75
76 rem <- L %% nb_per_set #number of remaining unassigned items
77 if (rem == 0)
78 return (indices_workers)
79
80 rem <- (L-rem+1):L
81 # If remainder is smaller than min_size, feed it with indices from other sets
82 # until either its size exceed min_size (success) or other sets' size
83 # get lower min_size (failure).
84 while (length(rem) < min_size)
85 {
86 index <- length(rem) %% nb_workers + 1
87 if (length(indices_workers[[index]]) <= min_size)
88 {
89 stop("Impossible to split indices properly for clustering.
90 Try increasing nb_items_clust or decreasing K1")
91 }
92 rem <- c(rem, tail(indices_workers[[index]],1))
93 indices_workers[[index]] <- head( indices_workers[[index]], -1)
94 }
95 return ( c(indices_workers, list(rem) ) )
96 }
97
98 #' assignMedoids
99 #'
100 #' Find the closest medoid for each curve in input
101 #'
102 #' @param curves (Chunk) of series whose medoids indices must be found
103 #' @param medoids Matrix of medoids (in columns)
104 #'
105 #' @return The vector of integer assignments
106 #' @export
107 assignMedoids <- function(curves, medoids)
108 {
109 nb_series <- ncol(curves)
110 mi <- rep(NA,nb_series)
111 for (i in seq_len(nb_series))
112 mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
113 mi
114 }
115
116 #' filterMA
117 #'
118 #' Filter [time-]series by replacing all values by the moving average of values
119 #' centered around current one. Border values are averaged with available data.
120 #'
121 #' @param M_ A real matrix of size LxD
122 #' @param w_ The (odd) number of values to average
123 #'
124 #' @return The filtered matrix (in columns), of same size as the input
125 #' @export
126 filterMA <- function(M_, w_)
127 .Call("filterMA", M_, w_, PACKAGE="epclust")
128
129 #' cleanBin
130 #'
131 #' Remove binary files to re-generate them at next run of \code{claws()}.
132 #' To be run in the folder where computations occurred (or no effect).
133 #'
134 #' @export
135 cleanBin <- function()
136 {
137 bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
138 for (file in bin_files)
139 unlink(file)
140 }