X-Git-Url: https://git.auder.net/doc/html/img/rock_paper_scissors_lizard_spock.gif?a=blobdiff_plain;f=epclust%2FR%2Fclustering.R;h=b91d512b23b6b1cd82eb4f8689e96ea44448b420;hb=3c5a4b0880db63367a474a568e1322b3999932fe;hp=bea073a660e3c4f201546caa87d539522d4671a9;hpb=40f12a2f66d06fd77183ea02b996f5c66f90761c;p=epclust.git
diff --git a/epclust/R/clustering.R b/epclust/R/clustering.R
index bea073a..b91d512 100644
--- a/epclust/R/clustering.R
+++ b/epclust/R/clustering.R
@@ -1,30 +1,28 @@
-#' @name clustering
-#' @rdname clustering
-#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
+#' Two-stage clustering, within one task (see \code{claws()})
#'
-#' @title Two-stage clustering, withing one task (see \code{claws()})
+#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
+#' stage 1 clustering on nb_curves / ntasks energy contributions, computed through
+#' discrete wavelets coefficients.
+#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
+#' computations between medoids (indices) output from stage 1, before applying
+#' the second clustering algorithm on the distances matrix.
#'
-#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
-#' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed
-#' through discrete wavelets coefficients.
-#' \code{clusteringTask2()} runs a full stage-2 task, which consists in
-#' WER distances computations between medoids indices output from stage 1,
-#' before applying the second clustering algorithm, on the distances matrix.
-#'
-#' @param indices Range of series indices to cluster
#' @param getContribs Function to retrieve contributions from initial series indices:
#' \code{getContribs(indices)} outputs a contributions matrix
#' @inheritParams claws
#' @inheritParams computeSynchrones
+#' @inheritParams computeWerDists
+#'
+#' @return The indices of the computed (resp. K1 and K2) medoids.
#'
-#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
-#' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
-#' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
+#' @name clustering
+#' @rdname clustering
+#' @aliases clusteringTask1 clusteringTask2
NULL
#' @rdname clustering
#' @export
-clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk,
+clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust,
ncores_clust=1, verbose=FALSE, parll=TRUE)
{
if (parll)
@@ -36,7 +34,7 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_c
while (length(indices) > K1)
{
# Balance tasks by splitting the indices set - as evenly as possible
- indices_workers = .splitIndices(indices, nb_items_clust1)
+ indices_workers = .splitIndices(indices, nb_items_clust, min_size=K1+1)
if (verbose)
cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
indices <-
@@ -66,22 +64,17 @@ clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chu
nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
{
if (verbose)
- cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))
-
- if (ncol(medoids) <= K2)
- return (medoids)
+ cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))
- # A) Obtain synchrones, that is to say the cumulated power consumptions
- # for each of the K1 initial groups
- synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
- nb_series_per_chunk, ncores_clust, verbose, parll)
+ if (length(indices) <= K2)
+ return (indices)
- # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
- distances = computeWerDists(
- synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll)
+ # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
+ distances = computeWerDists(indices, getSeries, nb_series_per_chunk,
+ nvoice, nbytes, endian, ncores_clust, verbose, parll)
- # C) Apply clustering algorithm 2 on the WER distances matrix
+ # B) Apply clustering algorithm 2 on the WER distances matrix
if (verbose)
cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
- medoids[ ,algoClust2(distances,K2) ]
+ indices[ algoClust2(distances,K2) ]
}