#'
#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
#' iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed
-#' through discrete wavelets coefficients). \code{computeClusters1()} and
-#' \code{computeClusters2()} correspond to the atomic clustering procedures respectively
-#' for stage 1 and 2. The former applies the clustering algorithm (PAM) on a
-#' contributions matrix, while the latter clusters a chunk of series inside one task
-#' (~max nb_series_per_chunk)
+#' through discrete wavelets coefficients).
+#' \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones
+#' and then WER distances computations, before applying the clustering algorithm.
+#' \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic
+#' clustering procedures respectively for stage 1 and 2. The former applies the
+#' clustering algorithm (PAM) on a contributions matrix, while the latter clusters
+#' a chunk of series inside one task (~max nb_series_per_chunk)
#'
#' @param indices Range of series indices to cluster in parallel (initial data)
#' @param getContribs Function to retrieve contributions from initial series indices:
#' @rdname clustering
#' @export
-computeClusters1 = function(contribs, K1)
- cluster::pam(contribs, K1, diss=FALSE)$id.med
-
-#' @rdname clustering
-#' @export
-computeClusters2 = function(medoids, K2,
+clusteringTask2 = function(medoids, K2,
getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
{
+ if (nrow(medoids) <= K2)
+ return (medoids)
synchrones = computeSynchrones(medoids,
getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust, verbose, parll)
distances = computeWerDists(synchrones, ncores_clust, verbose, parll)
- #TODO: if PAM cannot take big.matrix in input, cast it before... (more than OK in RAM)
- medoids[ cluster::pam(distances, K2, diss=TRUE)$medoids , ]
+ # PAM in package 'cluster' cannot take big.matrix in input: need to cast it
+ mat_dists = matrix(nrow=K1, ncol=K1)
+ for (i in seq_len(K1))
+ mat_dists[i,] = distances[i,]
+ medoids[ computeClusters2(mat_dists,K2), ]
}
+#' @rdname clustering
+#' @export
+computeClusters1 = function(contribs, K1)
+ cluster::pam(contribs, K1, diss=FALSE)$id.med
+
+#' @rdname clustering
+#' @export
+computeClusters2 = function(distances, K2)
+ cluster::pam(distances, K2, diss=TRUE)$id.med
+
#' computeSynchrones
#'
#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
computeSynchrones = function(medoids, getRefSeries,
nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
{
-
-
-
-#TODO: si parll, getMedoids + serialization, pass only getMedoids to nodes
-# --> BOF... chaque node chargera tous les medoids (efficacité) :/ ==> faut que ça tienne en RAM
-#au pire :: C-ifier et charger medoids 1 by 1...
-
- #MIEUX :: medoids DOIT etre une big.matrix partagée !
-
computeSynchronesChunk = function(indices)
{
if (verbose)