X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=epclust%2FR%2Futils.R;h=e79c00943bc89febca3e2018dbb7e5a33890f2df;hp=ba643d0b5ad198907e2b24c793244825a5e77446;hb=3c5a4b0880db63367a474a568e1322b3999932fe;hpb=40f12a2f66d06fd77183ea02b996f5c66f90761c

diff --git a/epclust/R/utils.R b/epclust/R/utils.R
index ba643d0..e79c009 100644
--- a/epclust/R/utils.R
+++ b/epclust/R/utils.R
@@ -30,13 +30,13 @@
 #' Compute the discrete wavelet coefficients for each series, and aggregate them in
 #' energy contribution across scales as described in https://arxiv.org/abs/1101.4744v2
 #'
-#' @param series [big.]matrix of series (in columns), of size L x n
+#' @param curves [big.]matrix of series (in columns), of size L x n
 #' @inheritParams claws
 #'
 #' @return A matrix of size log(L) x n containing contributions in columns
 #'
 #' @export
-curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
+curvesToContribs = function(series, wav_filt, contrib_type)
 {
 	L = nrow(series)
 	D = ceiling( log2(L) )
@@ -55,9 +55,9 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
 	})
 }
 
-# Helper function to divide indices into balanced sets
-# If max == TRUE, sets sizes cannot exceed nb_per_set
-.splitIndices = function(indices, nb_per_set, max=FALSE)
+# Helper function to divide indices into balanced sets.
+# Ensure that all indices sets have at least min_size elements.
+.splitIndices = function(indices, nb_per_set, min_size=1)
 {
 	L = length(indices)
 	nb_workers = floor( L / nb_per_set )
@@ -65,29 +65,32 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
 	if (nb_workers == 0 || (nb_workers==1 && rem==0))
 	{
 		# L <= nb_per_set, simple case
-		indices_workers = list(indices)
+		return (list(indices))
 	}
-	else
-	{
-		indices_workers = lapply( seq_len(nb_workers), function(i)
-			indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
 
-		if (max)
-		{
-			# Sets are not so well balanced, but size is supposed to be critical
-			return ( c( indices_workers, if (rem>0) list((L-rem+1):L) else NULL ) )
-		}
+	indices_workers = lapply( seq_len(nb_workers), function(i)
+		indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
 
-		# Spread the remaining load among the workers
-		rem = L %% nb_per_set
-		while (rem > 0)
+	rem = L %% nb_per_set #number of remaining unassigned items
+	if (rem == 0)
+		return (indices_workers)
+
+	rem <- (L-rem+1):L
+	# If remainder is smaller than min_size, feed it with indices from other sets
+	# until either its size exceed min_size (success) or other sets' size
+	# get lower min_size (failure).
+	while (length(rem) < min_size)
+	{
+		index = length(rem) %% nb_workers + 1
+		if (length(indices_workers[[index]]) <= min_size)
 		{
-			index = rem%%nb_workers + 1
-			indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
-			rem = rem - 1
+			stop("Impossible to split indices properly for clustering.
+				Try increasing nb_items_clust or decreasing K1")
 		}
+		rem = c(rem, tail(indices_workers[[index]],1))
+		indices_workers[[index]] = head( indices_workers[[index]], -1)
 	}
-	indices_workers
+	return ( c(indices_workers, list(rem) ) )
 }
 
 #' filterMA
@@ -98,7 +101,7 @@ curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
 #' @param M_ A real matrix of size LxD
 #' @param w_ The (odd) number of values to average
 #'
-#' @return The filtered matrix, of same size as the input
+#' @return The filtered matrix (in columns), of same size as the input
 #' @export
 filterMA = function(M_, w_)
 	.Call("filterMA", M_, w_, PACKAGE="epclust")