Commit | Line | Data |
---|---|---|
4bcfdbee BA |
1 | #' @name clustering |
2 | #' @rdname clustering | |
eef6f6c9 | 3 | #' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2 |
4bcfdbee | 4 | #' |
492cd9e7 | 5 | #' @title Two-stage clustering, withing one task (see \code{claws()}) |
4bcfdbee | 6 | #' |
492cd9e7 | 7 | #' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in |
40f12a2f BA |
8 | #' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed |
9 | #' through discrete wavelets coefficients. | |
10 | #' \code{clusteringTask2()} runs a full stage-2 task, which consists in | |
11 | #' WER distances computations between medoids indices output from stage 1, | |
12 | #' before applying the second clustering algorithm, on the distances matrix. | |
4bcfdbee | 13 | #' |
40f12a2f | 14 | #' @param indices Range of series indices to cluster |
4bcfdbee | 15 | #' @param getContribs Function to retrieve contributions from initial series indices: |
40f12a2f | 16 | #' \code{getContribs(indices)} outputs a contributions matrix |
4bcfdbee | 17 | #' @inheritParams claws |
40f12a2f | 18 | #' @inheritParams computeSynchrones |
4bcfdbee | 19 | #' |
0486fbad BA |
20 | #' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids. |
21 | #' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()} | |
22 | #' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters) | |
4bcfdbee BA |
23 | NULL |
24 | ||
25 | #' @rdname clustering | |
26 | #' @export | |
40f12a2f | 27 | clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk, |
37c82bba | 28 | ncores_clust=1, verbose=FALSE, parll=TRUE) |
5c652979 | 29 | { |
492cd9e7 | 30 | if (parll) |
7b13d0c2 | 31 | { |
37c82bba | 32 | cl = parallel::makeCluster(ncores_clust, outfile = "") |
d9bb53c5 | 33 | parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) |
7b13d0c2 | 34 | } |
d9bb53c5 | 35 | # Iterate clustering algorithm 1 until K1 medoids are found |
492cd9e7 BA |
36 | while (length(indices) > K1) |
37 | { | |
d9bb53c5 | 38 | # Balance tasks by splitting the indices set - as evenly as possible |
40f12a2f | 39 | indices_workers = .splitIndices(indices, nb_items_clust1) |
0486fbad BA |
40 | if (verbose) |
41 | cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep="")) | |
e161499b BA |
42 | indices <- |
43 | if (parll) | |
44 | { | |
45 | unlist( parallel::parLapply(cl, indices_workers, function(inds) { | |
46 | require("epclust", quietly=TRUE) | |
0486fbad | 47 | inds[ algoClust1(getContribs(inds), K1) ] |
e161499b BA |
48 | }) ) |
49 | } | |
50 | else | |
51 | { | |
52 | unlist( lapply(indices_workers, function(inds) | |
0486fbad | 53 | inds[ algoClust1(getContribs(inds), K1) ] |
e161499b BA |
54 | ) ) |
55 | } | |
492cd9e7 BA |
56 | } |
57 | if (parll) | |
58 | parallel::stopCluster(cl) | |
59 | ||
56857861 | 60 | indices #medoids |
5c652979 BA |
61 | } |
62 | ||
4bcfdbee BA |
63 | #' @rdname clustering |
64 | #' @export | |
40f12a2f BA |
65 | clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, |
66 | nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE) | |
5c652979 | 67 | { |
e161499b | 68 | if (verbose) |
0486fbad | 69 | cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep="")) |
e161499b | 70 | |
0486fbad | 71 | if (ncol(medoids) <= K2) |
bf5c0844 | 72 | return (medoids) |
d9bb53c5 BA |
73 | |
74 | # A) Obtain synchrones, that is to say the cumulated power consumptions | |
75 | # for each of the K1 initial groups | |
0486fbad | 76 | synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves, |
d9bb53c5 BA |
77 | nb_series_per_chunk, ncores_clust, verbose, parll) |
78 | ||
79 | # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination) | |
a52836b2 BA |
80 | distances = computeWerDists( |
81 | synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll) | |
d9bb53c5 BA |
82 | |
83 | # C) Apply clustering algorithm 2 on the WER distances matrix | |
e161499b | 84 | if (verbose) |
a52836b2 | 85 | cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) |
9f05a4a0 | 86 | medoids[ ,algoClust2(distances,K2) ] |
e161499b | 87 | } |