X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fclustering.R;h=f5e497fa0784078768e9ab7bf9d530817510a661;hb=bf5c08443087a23ea3d1a7ab993568e608a8b5dd;hp=3993e7685c97b194644d3600ceeea6b7bdac54ac;hpb=24ed5d835e2eebaaa4d5f8296f8d2e2132cc6398;p=epclust.git

diff --git a/epclust/R/clustering.R b/epclust/R/clustering.R
index 3993e76..f5e497f 100644
--- a/epclust/R/clustering.R
+++ b/epclust/R/clustering.R
@@ -6,11 +6,13 @@
 #'
 #' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
 #'   iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed
-#'   through discrete wavelets coefficients). \code{computeClusters1()} and
-#'   \code{computeClusters2()} correspond to the atomic clustering procedures respectively
-#'   for stage 1 and 2. The former applies the clustering algorithm (PAM) on a
-#'   contributions matrix, while the latter clusters a chunk of series inside one task
-#'   (~max nb_series_per_chunk)
+#'   through discrete wavelets coefficients).
+#'   \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones
+#'   and then WER distances computations, before applying the clustering algorithm.
+#'   \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic
+#'   clustering procedures respectively for stage 1 and 2. The former applies the
+#'   clustering algorithm (PAM) on a contributions matrix, while the latter clusters
+#'   a chunk of series inside one task (~max nb_series_per_chunk)
 #'
 #' @param indices Range of series indices to cluster in parallel (initial data)
 #' @param getContribs Function to retrieve contributions from initial series indices:
@@ -62,21 +64,31 @@ clusteringTask1 = function(
 
 #' @rdname clustering
 #' @export
-computeClusters1 = function(contribs, K1)
-	cluster::pam(contribs, K1, diss=FALSE)$id.med
-
-#' @rdname clustering
-#' @export
-computeClusters2 = function(medoids, K2,
+clusteringTask2 = function(medoids, K2,
 	getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
 {
+	if (nrow(medoids) <= K2)
+		return (medoids)
 	synchrones = computeSynchrones(medoids,
 		getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust, verbose, parll)
 	distances = computeWerDists(synchrones, ncores_clust, verbose, parll)
-	#TODO: if PAM cannot take big.matrix in input, cast it before... (more than OK in RAM)
-	medoids[ cluster::pam(distances, K2, diss=TRUE)$medoids , ]
+	# PAM in package 'cluster' cannot take big.matrix in input: need to cast it
+	mat_dists = matrix(nrow=K1, ncol=K1)
+	for (i in seq_len(K1))
+		mat_dists[i,] = distances[i,]
+	medoids[ computeClusters2(mat_dists,K2), ]
 }
 
+#' @rdname clustering
+#' @export
+computeClusters1 = function(contribs, K1)
+	cluster::pam(contribs, K1, diss=FALSE)$id.med
+
+#' @rdname clustering
+#' @export
+computeClusters2 = function(distances, K2)
+	cluster::pam(distances, K2, diss=TRUE)$id.med
+
 #' computeSynchrones
 #'
 #' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,