#' @name clustering #' @rdname clustering #' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2 #' #' @title Two-stage clustering, withing one task (see \code{claws()}) #' #' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in #' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed #' through discrete wavelets coefficients. #' \code{clusteringTask2()} runs a full stage-2 task, which consists in #' WER distances computations between medoids indices output from stage 1, #' before applying the second clustering algorithm, on the distances matrix. #' #' @param indices Range of series indices to cluster #' @param getContribs Function to retrieve contributions from initial series indices: #' \code{getContribs(indices)} outputs a contributions matrix #' @inheritParams claws #' @inheritParams computeSynchrones #' #' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids. #' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()} #' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters) NULL #' @rdname clustering #' @export clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk, ncores_clust=1, verbose=FALSE, parll=TRUE) { if (parll) { cl = parallel::makeCluster(ncores_clust, outfile = "") parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) } # Iterate clustering algorithm 1 until K1 medoids are found while (length(indices) > K1) { # Balance tasks by splitting the indices set - as evenly as possible indices_workers = .splitIndices(indices, nb_items_clust1) if (verbose) cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep="")) indices <- if (parll) { unlist( parallel::parLapply(cl, indices_workers, function(inds) { require("epclust", quietly=TRUE) inds[ algoClust1(getContribs(inds), K1) ] }) ) } else { unlist( lapply(indices_workers, function(inds) inds[ algoClust1(getContribs(inds), K1) ] ) ) } } if (parll) parallel::stopCluster(cl) indices #medoids } #' @rdname clustering #' @export clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE) { if (verbose) cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep="")) if (ncol(medoids) <= K2) return (medoids) # A) Obtain synchrones, that is to say the cumulated power consumptions # for each of the K1 initial groups synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust, verbose, parll) # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination) distances = computeWerDists( synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll) # C) Apply clustering algorithm 2 on the WER distances matrix if (verbose) cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) medoids[ ,algoClust2(distances,K2) ] }