-toInteger <- function(x, condition)
+# Check integer arguments with functional conditions
+.toInteger <- function(x, condition)
{
+ errWarn <- function(ignored)
+ paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
if (!is.integer(x))
- tryCatch(
- {x = as.integer(x)[1]},
- error = function(e) paste("cannot convert argument",substitute(x),"to integer")
- )
+ tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
+ warning = errWarn, error = errWarn)
if (!condition(x))
- stop(paste("argument",substitute(x),"does not verify condition",body(condition)))
+ {
+ stop(paste("Argument '",substitute(x),
+ "' does not verify condition ",body(condition), sep=""))
+ }
x
}
-curvesToCoeffs = function(series, wf)
+# Check logical arguments
+.toLogical <- function(x)
{
- L = length(series[1,])
+ errWarn <- function(ignored)
+ paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
+ if (!is.logical(x))
+ tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
+ warning = errWarn, error = errWarn)
+ x
+}
+
+#' curvesToContribs
+#'
+#' Compute the discrete wavelet coefficients for each series, and aggregate them in
+#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
+#'
+#' @param series [big.]matrix of series (in columns), of size L x n
+#' @inheritParams claws
+#'
+#' @return A matrix of size log(L) x n containing contributions in columns
+#'
+#' @export
+curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
+{
+ L = nrow(series)
D = ceiling( log2(L) )
+ # Series are interpolated to all have length 2^D
nb_sample_points = 2^D
- apply(series, 1, function(x) {
+ apply(series, 2, function(x) {
interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
- W = wavelets::dwt(interpolated_curve, filter=wf, D)@W
- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
+ W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
+ # Compute the sum of squared discrete wavelet coefficients, for each scale
+ nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
+ if (contrib_type!="absolute")
+ nrj = nrj / sum(nrj)
+ if (contrib_type=="logit")
+ nrj = - log(1 - nrj)
+ nrj
})
}
-#data: matrix of double or connection
-serialize = function(data, file, type, nb_per_chunk)
+# Helper function to divide indices into balanced sets
+# If max == TRUE, sets sizes cannot exceed nb_per_set
+.splitIndices = function(indices, nb_per_set, max=FALSE)
{
- bin_data = file(file, "ab")
- #write data length on first call
- nbytes = ifelse(type=="double",8,4)
- first_write = FALSE
- if (file.info(file)$size == 0)
+ L = length(indices)
+ nb_workers = floor( L / nb_per_set )
+ rem = L %% nb_per_set
+ if (nb_workers == 0 || (nb_workers==1 && rem==0))
{
- #number of items always on 8 bytes
- writeBin(0L, bin_data, size=8) #,endian="little")
- first_write = TRUE
+ # L <= nb_per_set, simple case
+ indices_workers = list(indices)
}
- if (is.matrix(data))
+ else
{
- writeBin(t(data), bin_data, size=nbytes)
- data_length = ncol(data)
- }
- else #if (is(data, "connection"))
- {
- if (first_write)
+ indices_workers = lapply( seq_len(nb_workers), function(i)
+ indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
+
+ if (max)
{
- data_line = scan(data, double(), sep=",", nlines=1)
- writeBin(data_line, bin_data, size=nbytes)
- data_length = length(data_line)
+ # Sets are not so well balanced, but size is supposed to be critical
+ return ( c( indices_workers, if (rem>0) list((L-rem+1):L) else NULL ) )
}
- repeat
+
+ # Spread the remaining load among the workers
+ rem = L %% nb_per_set
+ while (rem > 0)
{
- data_chunk = scan(data, double(), sep=",", nlines=nb_per_chunk)
- if (length(data_chunk)==0)
- break
- writeBin(data_chunk, bin_data, size=nbytes)
+ index = rem%%nb_workers + 1
+ indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
+ rem = rem - 1
}
}
- if (first_write)
- {
- #ecrire file_size-1 / (nbytes*nbWritten) en 0 dans bin_data ! ignored == file_size
- ignored = seek(bin_data, 0)
- writeBin(data_length, bin_data, size=8)
- }
- close(bin_data)
+ indices_workers
}
-#TODO: read in binary file, always same structure
-getDataFromFile(indices, file, type)
+#' filterMA
+#'
+#' Filter [time-]series by replacing all values by the moving average of values
+#' centered around current one. Border values are averaged with available data.
+#'
+#' @param M_ A real matrix of size LxD
+#' @param w_ The (odd) number of values to average
+#'
+#' @return The filtered matrix, of same size as the input
+#' @export
+filterMA = function(M_, w_)
+ .Call("filterMA", M_, w_, PACKAGE="epclust")
+
+#' cleanBin
+#'
+#' Remove binary files to re-generate them at next run of \code{claws()}.
+#' Note: run it in the folder where the computations occurred (or no effect).
+#'
+#' @export
+cleanBin <- function()
{
- bin_data = file(file, "rb")
- nbytes = ifelse(type=="double",8,4)
- data_length = readBin(bin_data,"double",1,nbytes) #,endian="little")
- t(sapply(indices, function(i) readBin(bin_data,"double",n=data_length,size=nbytes)))
+ bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
+ for (file in bin_files)
+ unlink(file)
}