Commit | Line | Data |
---|---|---|
3c5a4b08 | 1 | #' Two-stage clustering, within one task (see \code{claws()}) |
4bcfdbee | 2 | #' |
3c5a4b08 | 3 | #' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated |
3fb6e823 | 4 | #' clustering on nb_curves / ntasks energy contributions, computed through |
3c5a4b08 BA |
5 | #' discrete wavelets coefficients. |
6 | #' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances | |
7 | #' computations between medoids (indices) output from stage 1, before applying | |
8 | #' the second clustering algorithm on the distances matrix. | |
4bcfdbee | 9 | #' |
4bcfdbee | 10 | #' @param getContribs Function to retrieve contributions from initial series indices: |
3fb6e823 | 11 | #' \code{getContribs(indices)} outputs a contributions matrix, in columns |
4bcfdbee | 12 | #' @inheritParams claws |
40f12a2f | 13 | #' @inheritParams computeSynchrones |
3c5a4b08 BA |
14 | #' @inheritParams computeWerDists |
15 | #' | |
16 | #' @return The indices of the computed (resp. K1 and K2) medoids. | |
4bcfdbee | 17 | #' |
3c5a4b08 BA |
18 | #' @name clustering |
19 | #' @rdname clustering | |
20 | #' @aliases clusteringTask1 clusteringTask2 | |
4bcfdbee BA |
21 | NULL |
22 | ||
23 | #' @rdname clustering | |
24 | #' @export | |
282342ba | 25 | clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust, |
3fb6e823 | 26 | ncores_clust=3, verbose=FALSE, parll=TRUE) |
5c652979 | 27 | { |
dc86eb0c | 28 | if (verbose) |
e0154a59 | 29 | cat(paste("*** Clustering task 1 on ",length(indices)," series [start]\n", sep="")) |
dc86eb0c BA |
30 | |
31 | if (length(indices) <= K1) | |
32 | return (indices) | |
33 | ||
492cd9e7 | 34 | if (parll) |
7b13d0c2 | 35 | { |
282342ba | 36 | # outfile=="" to see stderr/stdout on terminal |
3fb6e823 BA |
37 | cl <- |
38 | if (verbose) | |
39 | parallel::makeCluster(ncores_clust, outfile = "") | |
40 | else | |
41 | parallel::makeCluster(ncores_clust) | |
d9bb53c5 | 42 | parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) |
7b13d0c2 | 43 | } |
d9bb53c5 | 44 | # Iterate clustering algorithm 1 until K1 medoids are found |
492cd9e7 BA |
45 | while (length(indices) > K1) |
46 | { | |
d9bb53c5 | 47 | # Balance tasks by splitting the indices set - as evenly as possible |
282342ba | 48 | indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1) |
e161499b BA |
49 | indices <- |
50 | if (parll) | |
51 | { | |
52 | unlist( parallel::parLapply(cl, indices_workers, function(inds) { | |
53 | require("epclust", quietly=TRUE) | |
0486fbad | 54 | inds[ algoClust1(getContribs(inds), K1) ] |
e161499b BA |
55 | }) ) |
56 | } | |
57 | else | |
58 | { | |
59 | unlist( lapply(indices_workers, function(inds) | |
0486fbad | 60 | inds[ algoClust1(getContribs(inds), K1) ] |
e161499b BA |
61 | ) ) |
62 | } | |
dc86eb0c BA |
63 | if (verbose) |
64 | { | |
e0154a59 | 65 | cat(paste("*** Clustering task 1 on ",length(indices)," medoids [iter]\n", sep="")) |
dc86eb0c | 66 | } |
492cd9e7 BA |
67 | } |
68 | if (parll) | |
69 | parallel::stopCluster(cl) | |
70 | ||
56857861 | 71 | indices #medoids |
5c652979 BA |
72 | } |
73 | ||
4bcfdbee BA |
74 | #' @rdname clustering |
75 | #' @export | |
282342ba | 76 | clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, |
3fb6e823 | 77 | smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE, parll=TRUE) |
5c652979 | 78 | { |
e161499b | 79 | if (verbose) |
3c5a4b08 | 80 | cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep="")) |
d9bb53c5 | 81 | |
3c5a4b08 BA |
82 | if (length(indices) <= K2) |
83 | return (indices) | |
d9bb53c5 | 84 | |
3c5a4b08 | 85 | # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination) |
282342ba BA |
86 | distances <- computeWerDists(indices, getSeries, nb_series_per_chunk, |
87 | smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose, parll) | |
d9bb53c5 | 88 | |
3c5a4b08 | 89 | # B) Apply clustering algorithm 2 on the WER distances matrix |
e161499b | 90 | if (verbose) |
a52836b2 | 91 | cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) |
3c5a4b08 | 92 | indices[ algoClust2(distances,K2) ] |
e161499b | 93 | } |