X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=epclust%2FR%2Futils.R;h=e79c00943bc89febca3e2018dbb7e5a33890f2df;hp=ba643d0b5ad198907e2b24c793244825a5e77446;hb=3c5a4b0880db63367a474a568e1322b3999932fe;hpb=40f12a2f66d06fd77183ea02b996f5c66f90761c diff --git a/epclust/R/utils.R b/epclust/R/utils.R index ba643d0..e79c009 100644 --- a/epclust/R/utils.R +++ b/epclust/R/utils.R @@ -30,13 +30,13 @@ #' Compute the discrete wavelet coefficients for each series, and aggregate them in #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2 #' -#' @param series [big.]matrix of series (in columns), of size L x n +#' @param curves [big.]matrix of series (in columns), of size L x n #' @inheritParams claws #' #' @return A matrix of size log(L) x n containing contributions in columns #' #' @export -curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE) +curvesToContribs = function(series, wav_filt, contrib_type) { L = nrow(series) D = ceiling( log2(L) ) @@ -55,9 +55,9 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE) }) } -# Helper function to divide indices into balanced sets -# If max == TRUE, sets sizes cannot exceed nb_per_set -.splitIndices = function(indices, nb_per_set, max=FALSE) +# Helper function to divide indices into balanced sets. +# Ensure that all indices sets have at least min_size elements. +.splitIndices = function(indices, nb_per_set, min_size=1) { L = length(indices) nb_workers = floor( L / nb_per_set ) @@ -65,29 +65,32 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE) if (nb_workers == 0 || (nb_workers==1 && rem==0)) { # L <= nb_per_set, simple case - indices_workers = list(indices) + return (list(indices)) } - else - { - indices_workers = lapply( seq_len(nb_workers), function(i) - indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] ) - if (max) - { - # Sets are not so well balanced, but size is supposed to be critical - return ( c( indices_workers, if (rem>0) list((L-rem+1):L) else NULL ) ) - } + indices_workers = lapply( seq_len(nb_workers), function(i) + indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] ) - # Spread the remaining load among the workers - rem = L %% nb_per_set - while (rem > 0) + rem = L %% nb_per_set #number of remaining unassigned items + if (rem == 0) + return (indices_workers) + + rem <- (L-rem+1):L + # If remainder is smaller than min_size, feed it with indices from other sets + # until either its size exceed min_size (success) or other sets' size + # get lower min_size (failure). + while (length(rem) < min_size) + { + index = length(rem) %% nb_workers + 1 + if (length(indices_workers[[index]]) <= min_size) { - index = rem%%nb_workers + 1 - indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1]) - rem = rem - 1 + stop("Impossible to split indices properly for clustering. + Try increasing nb_items_clust or decreasing K1") } + rem = c(rem, tail(indices_workers[[index]],1)) + indices_workers[[index]] = head( indices_workers[[index]], -1) } - indices_workers + return ( c(indices_workers, list(rem) ) ) } #' filterMA @@ -98,7 +101,7 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE) #' @param M_ A real matrix of size LxD #' @param w_ The (odd) number of values to average #' -#' @return The filtered matrix, of same size as the input +#' @return The filtered matrix (in columns), of same size as the input #' @export filterMA = function(M_, w_) .Call("filterMA", M_, w_, PACKAGE="epclust")