Commit | Line | Data |
---|---|---|
b7cd987d BA |
1 | #' Two-stage clustering, within one task (see \code{claws()}) |
2 | #' | |
3 | #' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated | |
4 | #' clustering on nb_curves / ntasks energy contributions, computed through | |
5 | #' discrete wavelets coefficients. | |
6 | #' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances | |
7 | #' computations between medoids (indices) output from stage 1, before applying | |
8 | #' the second clustering algorithm on the distances matrix. | |
9 | #' | |
10 | #' @param getContribs Function to retrieve contributions from initial series indices: | |
11 | #' \code{getContribs(indices)} outputs a contributions matrix, in columns | |
12 | #' @inheritParams claws | |
13 | #' @inheritParams computeSynchrones | |
14 | #' @inheritParams computeWerDists | |
15 | #' | |
16 | #' @return The indices of the computed (resp. K1 and K2) medoids. | |
17 | #' | |
18 | #' @name clustering | |
19 | #' @rdname clustering | |
20 | #' @aliases clusteringTask1 clusteringTask2 | |
21 | NULL | |
22 | ||
23 | #' @rdname clustering | |
24 | #' @export | |
25 | clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust, | |
26 | ncores_clust=3, verbose=FALSE) | |
27 | { | |
28 | if (verbose) | |
29 | cat(paste("*** Clustering task 1 on ",length(indices)," series [start]\n", sep="")) | |
30 | ||
31 | if (length(indices) <= K1) | |
32 | return (indices) | |
33 | ||
34 | parll <- (ncores_clust > 1) | |
35 | if (parll) | |
36 | { | |
37 | # outfile=="" to see stderr/stdout on terminal | |
38 | cl <- | |
39 | if (verbose) | |
40 | parallel::makeCluster(ncores_clust, outfile = "") | |
41 | else | |
42 | parallel::makeCluster(ncores_clust) | |
43 | parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) | |
44 | } | |
45 | # Iterate clustering algorithm 1 until K1 medoids are found | |
46 | while (length(indices) > K1) | |
47 | { | |
48 | # Balance tasks by splitting the indices set - as evenly as possible | |
49 | indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1) | |
50 | indices <- | |
51 | if (parll) | |
52 | { | |
53 | unlist( parallel::parLapply(cl, indices_workers, function(inds) { | |
54 | require("epclust", quietly=TRUE) | |
55 | inds[ algoClust1(getContribs(inds), K1) ] | |
56 | }) ) | |
57 | } | |
58 | else | |
59 | { | |
60 | unlist( lapply(indices_workers, function(inds) | |
61 | inds[ algoClust1(getContribs(inds), K1) ] | |
62 | ) ) | |
63 | } | |
64 | if (verbose) | |
65 | { | |
66 | cat(paste("*** Clustering task 1 on ",length(indices)," medoids [iter]\n", sep="")) | |
67 | } | |
68 | } | |
69 | if (parll) | |
70 | parallel::stopCluster(cl) | |
71 | ||
72 | indices #medoids | |
73 | } | |
74 | ||
75 | #' @rdname clustering | |
76 | #' @export | |
77 | clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, | |
78 | smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE) | |
79 | { | |
80 | if (verbose) | |
81 | cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep="")) | |
82 | ||
83 | if (length(indices) <= K2) | |
84 | return (indices) | |
85 | ||
86 | # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination) | |
87 | distances <- computeWerDists(indices, getSeries, nb_series_per_chunk, | |
88 | smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose) | |
89 | ||
90 | # B) Apply clustering algorithm 2 on the WER distances matrix | |
91 | if (verbose) | |
92 | cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) | |
93 | indices[ algoClust2(distances,K2) ] | |
94 | } |