-#' @name clustering
-#' @rdname clustering
-#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
+#' Two-stage clustering, within one task (see \code{claws()})
#'
-#' @title Two-stage clustering, withing one task (see \code{claws()})
+#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
+#' clustering on nb_curves / ntasks energy contributions, computed through
+#' discrete wavelets coefficients.
+#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
+#' computations between medoids (indices) output from stage 1, before applying
+#' the second clustering algorithm on the distances matrix.
#'
-#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
-#' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed
-#' through discrete wavelets coefficients.
-#' \code{clusteringTask2()} runs a full stage-2 task, which consists in
-#' WER distances computations between medoids indices output from stage 1,
-#' before applying the second clustering algorithm, on the distances matrix.
-#'
-#' @param indices Range of series indices to cluster
#' @param getContribs Function to retrieve contributions from initial series indices:
-#' \code{getContribs(indices)} outputs a contributions matrix
+#' \code{getContribs(indices)} outputs a contributions matrix, in columns
#' @inheritParams claws
#' @inheritParams computeSynchrones
+#' @inheritParams computeWerDists
+#'
+#' @return The indices of the computed (resp. K1 and K2) medoids.
#'
-#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
-#' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
-#' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
+#' @name clustering
+#' @rdname clustering
+#' @aliases clusteringTask1 clusteringTask2
NULL
#' @rdname clustering
#' @export
-clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk,
- ncores_clust=1, verbose=FALSE, parll=TRUE)
+clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust,
+ ncores_clust=3, verbose=FALSE, parll=TRUE)
{
if (parll)
{
- cl = parallel::makeCluster(ncores_clust, outfile = "")
+ # outfile=="" to see stderr/stdout on terminal
+ cl <-
+ if (verbose)
+ parallel::makeCluster(ncores_clust, outfile = "")
+ else
+ parallel::makeCluster(ncores_clust)
parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
}
# Iterate clustering algorithm 1 until K1 medoids are found
while (length(indices) > K1)
{
# Balance tasks by splitting the indices set - as evenly as possible
- indices_workers = .splitIndices(indices, nb_items_clust1)
+ indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1)
if (verbose)
cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
indices <-
#' @rdname clustering
#' @export
-clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
- nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
+clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
+ smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE, parll=TRUE)
{
if (verbose)
- cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))
-
- if (ncol(medoids) <= K2)
- return (medoids)
+ cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))
- # A) Obtain synchrones, that is to say the cumulated power consumptions
- # for each of the K1 initial groups
- synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
- nb_series_per_chunk, ncores_clust, verbose, parll)
+ if (length(indices) <= K2)
+ return (indices)
- # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
- distances = computeWerDists(
- synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll)
+ # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
+ distances <- computeWerDists(indices, getSeries, nb_series_per_chunk,
+ smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose, parll)
- # C) Apply clustering algorithm 2 on the WER distances matrix
+ # B) Apply clustering algorithm 2 on the WER distances matrix
if (verbose)
cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
- medoids[ ,algoClust2(distances,K2) ]
+ indices[ algoClust2(distances,K2) ]
}