| 1 | #' @name clustering |
| 2 | #' @rdname clustering |
| 3 | #' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2 |
| 4 | #' |
| 5 | #' @title Two-stage clustering, withing one task (see \code{claws()}) |
| 6 | #' |
| 7 | #' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in |
| 8 | #' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed |
| 9 | #' through discrete wavelets coefficients. |
| 10 | #' \code{clusteringTask2()} runs a full stage-2 task, which consists in |
| 11 | #' WER distances computations between medoids indices output from stage 1, |
| 12 | #' before applying the second clustering algorithm, on the distances matrix. |
| 13 | #' |
| 14 | #' @param indices Range of series indices to cluster |
| 15 | #' @param getContribs Function to retrieve contributions from initial series indices: |
| 16 | #' \code{getContribs(indices)} outputs a contributions matrix |
| 17 | #' @inheritParams claws |
| 18 | #' @inheritParams computeSynchrones |
| 19 | #' |
| 20 | #' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids. |
| 21 | #' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()} |
| 22 | #' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters) |
| 23 | NULL |
| 24 | |
| 25 | #' @rdname clustering |
| 26 | #' @export |
| 27 | clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk, |
| 28 | ncores_clust=1, verbose=FALSE, parll=TRUE) |
| 29 | { |
| 30 | if (parll) |
| 31 | { |
| 32 | cl = parallel::makeCluster(ncores_clust, outfile = "") |
| 33 | parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) |
| 34 | } |
| 35 | # Iterate clustering algorithm 1 until K1 medoids are found |
| 36 | while (length(indices) > K1) |
| 37 | { |
| 38 | # Balance tasks by splitting the indices set - as evenly as possible |
| 39 | indices_workers = .splitIndices(indices, nb_items_clust1) |
| 40 | if (verbose) |
| 41 | cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep="")) |
| 42 | indices <- |
| 43 | if (parll) |
| 44 | { |
| 45 | unlist( parallel::parLapply(cl, indices_workers, function(inds) { |
| 46 | require("epclust", quietly=TRUE) |
| 47 | inds[ algoClust1(getContribs(inds), K1) ] |
| 48 | }) ) |
| 49 | } |
| 50 | else |
| 51 | { |
| 52 | unlist( lapply(indices_workers, function(inds) |
| 53 | inds[ algoClust1(getContribs(inds), K1) ] |
| 54 | ) ) |
| 55 | } |
| 56 | } |
| 57 | if (parll) |
| 58 | parallel::stopCluster(cl) |
| 59 | |
| 60 | indices #medoids |
| 61 | } |
| 62 | |
| 63 | #' @rdname clustering |
| 64 | #' @export |
| 65 | clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, |
| 66 | nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE) |
| 67 | { |
| 68 | if (verbose) |
| 69 | cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep="")) |
| 70 | |
| 71 | if (ncol(medoids) <= K2) |
| 72 | return (medoids) |
| 73 | |
| 74 | # A) Obtain synchrones, that is to say the cumulated power consumptions |
| 75 | # for each of the K1 initial groups |
| 76 | synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves, |
| 77 | nb_series_per_chunk, ncores_clust, verbose, parll) |
| 78 | |
| 79 | # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination) |
| 80 | distances = computeWerDists( |
| 81 | synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll) |
| 82 | |
| 83 | # C) Apply clustering algorithm 2 on the WER distances matrix |
| 84 | if (verbose) |
| 85 | cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) |
| 86 | medoids[ ,algoClust2(distances,K2) ] |
| 87 | } |