'update'
[epclust.git] / pkg / R / utils.R
CommitLineData
b7cd987d
BA
1# Check integer arguments with functional conditions
2.toInteger <- function(x, condition)
3{
4 errWarn <- function(ignored)
5 paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
6 if (!is.integer(x))
7 tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
8 warning=errWarn, error=errWarn)
9 if (!condition(x))
10 {
11 stop(paste("Argument '",substitute(x),
12 "' does not verify condition ",body(condition), sep=""))
13 }
14 x
15}
16
17# Check logical arguments
18.toLogical <- function(x)
19{
20 errWarn <- function(ignored)
21 paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
22 if (!is.logical(x))
23 tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
24 warning=errWarn, error=errWarn)
25 x
26}
27
28#' curvesToContribs
29#'
30#' Compute the discrete wavelet coefficients for each series, and aggregate them in
31#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
32#'
33#' @param curves [big.]matrix of series (in columns), of size L x n
34#' @param wav_filt Wavelet transform filter, as a vector c(Family,FilterNumber)
35#' @inheritParams claws
36#'
37#' @return A matrix of size log(L) x n containing contributions in columns
38#'
39#' @export
40curvesToContribs <- function(curves, wav_filt, contrib_type)
41{
42 series <- as.matrix(curves)
43 L <- nrow(series)
44 D <- ceiling( log2(L) )
45 # Series are interpolated to all have length 2^D
46 nb_sample_points <- 2^D
47 apply(series, 2, function(x) {
48 interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
49 W <- wavethresh::wd(interpolated_curve, wav_filt[2], wav_filt[1])$D
50 # Compute the sum of squared discrete wavelet coefficients, for each scale
51 nrj <- sapply( 1:D, function(i) ( sqrt( sum(W[(2^D-(2^i-1)):(2^D-2^(i-1))]^2) ) ) )
52 if (contrib_type!="absolute")
53 nrj <- nrj / sum(nrj)
54 if (contrib_type=="logit")
55 nrj <- - log(1 - nrj)
56 unname( nrj )
57 })
58}
59
60# Helper function to divide indices into balanced sets.
61# Ensure that all indices sets have at least min_size elements.
62.splitIndices <- function(indices, nb_per_set, min_size=1)
63{
64 L <- length(indices)
65 nb_workers <- floor( L / nb_per_set )
66 rem <- L %% nb_per_set
67 if (nb_workers == 0 || (nb_workers==1 && rem==0))
68 {
69 # L <= nb_per_set, simple case
70 return (list(indices))
71 }
72
73 indices_workers <- lapply( seq_len(nb_workers), function(i)
74 indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
75
76 rem <- L %% nb_per_set #number of remaining unassigned items
77 if (rem == 0)
78 return (indices_workers)
79
80 rem <- (L-rem+1):L
81 # If remainder is smaller than min_size, feed it with indices from other sets
82 # until either its size exceed min_size (success) or other sets' size
83 # get lower min_size (failure).
84 while (length(rem) < min_size)
85 {
86 index <- length(rem) %% nb_workers + 1
87 if (length(indices_workers[[index]]) <= min_size)
88 {
89 stop("Impossible to split indices properly for clustering.
90 Try increasing nb_items_clust or decreasing K1")
91 }
92 rem <- c(rem, tail(indices_workers[[index]],1))
93 indices_workers[[index]] <- head( indices_workers[[index]], -1)
94 }
95 return ( c(indices_workers, list(rem) ) )
96}
97
98#' assignMedoids
99#'
100#' Find the closest medoid for each curve in input
101#'
102#' @param curves (Chunk) of series whose medoids indices must be found
103#' @param medoids Matrix of medoids (in columns)
104#'
105#' @return The vector of integer assignments
106#' @export
107assignMedoids <- function(curves, medoids)
108{
109 nb_series <- ncol(curves)
110 mi <- rep(NA,nb_series)
111 for (i in seq_len(nb_series))
112 mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
113 mi
114}
115
116#' filterMA
117#'
118#' Filter [time-]series by replacing all values by the moving average of values
119#' centered around current one. Border values are averaged with available data.
120#'
121#' @param M_ A real matrix of size LxD
122#' @param w_ The (odd) number of values to average
123#'
124#' @return The filtered matrix (in columns), of same size as the input
125#' @export
126filterMA <- function(M_, w_)
127 .Call("filterMA", M_, w_, PACKAGE="epclust")
128
129#' cleanBin
130#'
131#' Remove binary files to re-generate them at next run of \code{claws()}.
132#' To be run in the folder where computations occurred (or no effect).
133#'
134#' @export
135cleanBin <- function()
136{
137 bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
138 for (file in bin_files)
139 unlink(file)
140}