'update'

[epclust.git] / epclust / R / utils.R
diff --git a/epclust/R/utils.R b/epclust/R/utils.R

index 8f7da38..e79c009 100644 (file)
--- a/epclust/R/utils.R
+++ b/epclust/R/utils.R
@@ -1,44 +1,120 @@
-toInteger <- function(x, condition)
+# Check integer arguments with functional conditions
+.toInteger <- function(x, condition)
  {
+       errWarn <- function(ignored)
+               paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
         if (!is.integer(x))
-               tryCatch(
-                       {x = as.integer(x)[1]},
-                       error = function(e) paste("cannot convert argument",substitute(x),"to integer")
-               )
+               tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
+                       warning = errWarn, error = errWarn)
         if (!condition(x))
-               stop(paste("argument",substitute(x),"does not verify condition",body(condition)))
+       {
+               stop(paste("Argument '",substitute(x),
+                       "' does not verify condition ",body(condition), sep=""))
+       }
         x
  }
  
-#TODO: merge these 2 next ?!
-serialize = function(coeffs)
+# Check logical arguments
+.toLogical <- function(x)
  {
-       #.........
-       #C function (from data.frame, type of IDs ??! force integers ? [yes])
-       #return raw vector
-}
-appendBinary = function(.......)
-{
-       #take raw vector, append it (binary mode) to a file
+       errWarn <- function(ignored)
+               paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
+       if (!is.logical(x))
+               tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
+                       warning = errWarn, error = errWarn)
+       x
  }
  
-#finalizeSerialization = function(...)
-#{
-#      #write number of series, and length of each...
-#}
-
-deserialize = function(coeffs, range)
+#' curvesToContribs
+#'
+#' Compute the discrete wavelet coefficients for each series, and aggregate them in
+#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
+#'
+#' @param curves [big.]matrix of series (in columns), of size L x n
+#' @inheritParams claws
+#'
+#' @return A matrix of size log(L) x n containing contributions in columns
+#'
+#' @export
+curvesToContribs = function(series, wav_filt, contrib_type)
  {
-       #......
-       #C function (from file name)
+       L = nrow(series)
+       D = ceiling( log2(L) )
+       # Series are interpolated to all have length 2^D
+       nb_sample_points = 2^D
+       apply(series, 2, function(x) {
+               interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
+               W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
+               # Compute the sum of squared discrete wavelet coefficients, for each scale
+               nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
+               if (contrib_type!="absolute")
+                       nrj = nrj / sum(nrj)
+               if (contrib_type=="logit")
+                       nrj = - log(1 - nrj)
+               nrj
+       })
  }
  
-getSeries(data, rank=NULL, id=NULL)
+# Helper function to divide indices into balanced sets.
+# Ensure that all indices sets have at least min_size elements.
+.splitIndices = function(indices, nb_per_set, min_size=1)
  {
-       #TODO:
+       L = length(indices)
+       nb_workers = floor( L / nb_per_set )
+       rem = L %% nb_per_set
+       if (nb_workers == 0 || (nb_workers==1 && rem==0))
+       {
+               # L <= nb_per_set, simple case
+               return (list(indices))
+       }
+
+       indices_workers = lapply( seq_len(nb_workers), function(i)
+               indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
+
+       rem = L %% nb_per_set #number of remaining unassigned items
+       if (rem == 0)
+               return (indices_workers)
+
+       rem <- (L-rem+1):L
+       # If remainder is smaller than min_size, feed it with indices from other sets
+       # until either its size exceed min_size (success) or other sets' size
+       # get lower min_size (failure).
+       while (length(rem) < min_size)
+       {
+               index = length(rem) %% nb_workers + 1
+               if (length(indices_workers[[index]]) <= min_size)
+               {
+                       stop("Impossible to split indices properly for clustering.
+                               Try increasing nb_items_clust or decreasing K1")
+               }
+               rem = c(rem, tail(indices_workers[[index]],1))
+               indices_workers[[index]] = head( indices_workers[[index]], -1)
+       }
+       return ( c(indices_workers, list(rem) ) )
  }
  
-getCoeffs(.....) #FROM BINARY FILE !!!
-{
+#' filterMA
+#'
+#' Filter [time-]series by replacing all values by the moving average of values
+#' centered around current one. Border values are averaged with available data.
+#'
+#' @param M_ A real matrix of size LxD
+#' @param w_ The (odd) number of values to average
+#'
+#' @return The filtered matrix (in columns), of same size as the input
+#' @export
+filterMA = function(M_, w_)
+       .Call("filterMA", M_, w_, PACKAGE="epclust")
  
+#' cleanBin
+#'
+#' Remove binary files to re-generate them at next run of \code{claws()}.
+#' Note: run it in the folder where the computations occurred (or no effect).
+#'
+#' @export
+cleanBin <- function()
+{
+       bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
+       for (file in bin_files)
+               unlink(file)
  }