X-Git-Url: https://git.auder.net/doc/html/img/rock_paper_scissors_lizard_spock.gif?a=blobdiff_plain;f=epclust%2FR%2Futils.R;h=e79c00943bc89febca3e2018dbb7e5a33890f2df;hb=3c5a4b0880db63367a474a568e1322b3999932fe;hp=ba643d0b5ad198907e2b24c793244825a5e77446;hpb=40f12a2f66d06fd77183ea02b996f5c66f90761c;p=epclust.git
diff --git a/epclust/R/utils.R b/epclust/R/utils.R
index ba643d0..e79c009 100644
--- a/epclust/R/utils.R
+++ b/epclust/R/utils.R
@@ -30,13 +30,13 @@
#' Compute the discrete wavelet coefficients for each series, and aggregate them in
#' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
#'
-#' @param series [big.]matrix of series (in columns), of size L x n
+#' @param curves [big.]matrix of series (in columns), of size L x n
#' @inheritParams claws
#'
#' @return A matrix of size log(L) x n containing contributions in columns
#'
#' @export
-curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
+curvesToContribs = function(series, wav_filt, contrib_type)
{
L = nrow(series)
D = ceiling( log2(L) )
@@ -55,9 +55,9 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
})
}
-# Helper function to divide indices into balanced sets
-# If max == TRUE, sets sizes cannot exceed nb_per_set
-.splitIndices = function(indices, nb_per_set, max=FALSE)
+# Helper function to divide indices into balanced sets.
+# Ensure that all indices sets have at least min_size elements.
+.splitIndices = function(indices, nb_per_set, min_size=1)
{
L = length(indices)
nb_workers = floor( L / nb_per_set )
@@ -65,29 +65,32 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
if (nb_workers == 0 || (nb_workers==1 && rem==0))
{
# L <= nb_per_set, simple case
- indices_workers = list(indices)
+ return (list(indices))
}
- else
- {
- indices_workers = lapply( seq_len(nb_workers), function(i)
- indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
- if (max)
- {
- # Sets are not so well balanced, but size is supposed to be critical
- return ( c( indices_workers, if (rem>0) list((L-rem+1):L) else NULL ) )
- }
+ indices_workers = lapply( seq_len(nb_workers), function(i)
+ indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
- # Spread the remaining load among the workers
- rem = L %% nb_per_set
- while (rem > 0)
+ rem = L %% nb_per_set #number of remaining unassigned items
+ if (rem == 0)
+ return (indices_workers)
+
+ rem <- (L-rem+1):L
+ # If remainder is smaller than min_size, feed it with indices from other sets
+ # until either its size exceed min_size (success) or other sets' size
+ # get lower min_size (failure).
+ while (length(rem) < min_size)
+ {
+ index = length(rem) %% nb_workers + 1
+ if (length(indices_workers[[index]]) <= min_size)
{
- index = rem%%nb_workers + 1
- indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
- rem = rem - 1
+ stop("Impossible to split indices properly for clustering.
+ Try increasing nb_items_clust or decreasing K1")
}
+ rem = c(rem, tail(indices_workers[[index]],1))
+ indices_workers[[index]] = head( indices_workers[[index]], -1)
}
- indices_workers
+ return ( c(indices_workers, list(rem) ) )
}
#' filterMA
@@ -98,7 +101,7 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
#' @param M_ A real matrix of size LxD
#' @param w_ The (odd) number of values to average
#'
-#' @return The filtered matrix, of same size as the input
+#' @return The filtered matrix (in columns), of same size as the input
#' @export
filterMA = function(M_, w_)
.Call("filterMA", M_, w_, PACKAGE="epclust")