epclust/R/utils.R

   1 # Check integer arguments with functional conditions
   2 .toInteger <- function(x, condition)
   3 {
   4     errWarn <- function(ignored)
   5         paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
   6     if (!is.integer(x))
   7         tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
   8             warning = errWarn, error = errWarn)
   9     if (!condition(x))
  10     {
  11         stop(paste("Argument '",substitute(x),
  12             "' does not verify condition ",body(condition), sep=""))
  13     }
  14     x
  15 }
  16
  17 # Check logical arguments
  18 .toLogical <- function(x)
  19 {
  20     errWarn <- function(ignored)
  21         paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
  22     if (!is.logical(x))
  23         tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
  24             warning = errWarn, error = errWarn)
  25     x
  26 }
  27
  28 #' curvesToContribs
  29 #'
  30 #' Compute the discrete wavelet coefficients for each series, and aggregate them in
  31 #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
  32 #'
  33 #' @param curves [big.]matrix of series (in columns), of size L x n
  34 #' @inheritParams claws
  35 #'
  36 #' @return A matrix of size log(L) x n containing contributions in columns
  37 #'
  38 #' @export
  39 curvesToContribs = function(series, wav_filt, contrib_type)
  40 {
  41     L = nrow(series)
  42     D = ceiling( log2(L) )
  43     # Series are interpolated to all have length 2^D
  44     nb_sample_points = 2^D
  45     apply(series, 2, function(x) {
  46         interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
  47         W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
  48         # Compute the sum of squared discrete wavelet coefficients, for each scale
  49         nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
  50         if (contrib_type!="absolute")
  51             nrj = nrj / sum(nrj)
  52         if (contrib_type=="logit")
  53             nrj = - log(1 - nrj)
  54         nrj
  55     })
  56 }
  57
  58 # Helper function to divide indices into balanced sets.
  59 # Ensure that all indices sets have at least min_size elements.
  60 .splitIndices = function(indices, nb_per_set, min_size=1)
  61 {
  62     L = length(indices)
  63     nb_workers = floor( L / nb_per_set )
  64     rem = L %% nb_per_set
  65     if (nb_workers == 0 || (nb_workers==1 && rem==0))
  66     {
  67         # L <= nb_per_set, simple case
  68         return (list(indices))
  69     }
  70
  71     indices_workers = lapply( seq_len(nb_workers), function(i)
  72         indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
  73
  74     rem = L %% nb_per_set #number of remaining unassigned items
  75     if (rem == 0)
  76         return (indices_workers)
  77
  78     rem <- (L-rem+1):L
  79     # If remainder is smaller than min_size, feed it with indices from other sets
  80     # until either its size exceed min_size (success) or other sets' size
  81     # get lower min_size (failure).
  82     while (length(rem) < min_size)
  83     {
  84         index = length(rem) %% nb_workers + 1
  85         if (length(indices_workers[[index]]) <= min_size)
  86         {
  87             stop("Impossible to split indices properly for clustering.
  88                 Try increasing nb_items_clust or decreasing K1")
  89         }
  90         rem = c(rem, tail(indices_workers[[index]],1))
  91         indices_workers[[index]] = head( indices_workers[[index]], -1)
  92     }
  93     return ( c(indices_workers, list(rem) ) )
  94 }
  95
  96 #' filterMA
  97 #'
  98 #' Filter [time-]series by replacing all values by the moving average of values
  99 #' centered around current one. Border values are averaged with available data.
 100 #'
 101 #' @param M_ A real matrix of size LxD
 102 #' @param w_ The (odd) number of values to average
 103 #'
 104 #' @return The filtered matrix (in columns), of same size as the input
 105 #' @export
 106 filterMA = function(M_, w_)
 107     .Call("filterMA", M_, w_, PACKAGE="epclust")
 108
 109 #' cleanBin
 110 #'
 111 #' Remove binary files to re-generate them at next run of \code{claws()}.
 112 #' Note: run it in the folder where the computations occurred (or no effect).
 113 #'
 114 #' @export
 115 cleanBin <- function()
 116 {
 117     bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
 118     for (file in bin_files)
 119         unlink(file)
 120 }