bea073a660e3c4f201546caa87d539522d4671a9
[epclust.git] / epclust / R / clustering.R
1 #' @name clustering
2 #' @rdname clustering
3 #' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
4 #'
5 #' @title Two-stage clustering, withing one task (see \code{claws()})
6 #'
7 #' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
8 #' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed
9 #' through discrete wavelets coefficients.
10 #' \code{clusteringTask2()} runs a full stage-2 task, which consists in
11 #' WER distances computations between medoids indices output from stage 1,
12 #' before applying the second clustering algorithm, on the distances matrix.
13 #'
14 #' @param indices Range of series indices to cluster
15 #' @param getContribs Function to retrieve contributions from initial series indices:
16 #' \code{getContribs(indices)} outputs a contributions matrix
17 #' @inheritParams claws
18 #' @inheritParams computeSynchrones
19 #'
20 #' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
21 #' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
22 #' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
23 NULL
24
25 #' @rdname clustering
26 #' @export
27 clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk,
28 ncores_clust=1, verbose=FALSE, parll=TRUE)
29 {
30 if (parll)
31 {
32 cl = parallel::makeCluster(ncores_clust, outfile = "")
33 parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
34 }
35 # Iterate clustering algorithm 1 until K1 medoids are found
36 while (length(indices) > K1)
37 {
38 # Balance tasks by splitting the indices set - as evenly as possible
39 indices_workers = .splitIndices(indices, nb_items_clust1)
40 if (verbose)
41 cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
42 indices <-
43 if (parll)
44 {
45 unlist( parallel::parLapply(cl, indices_workers, function(inds) {
46 require("epclust", quietly=TRUE)
47 inds[ algoClust1(getContribs(inds), K1) ]
48 }) )
49 }
50 else
51 {
52 unlist( lapply(indices_workers, function(inds)
53 inds[ algoClust1(getContribs(inds), K1) ]
54 ) )
55 }
56 }
57 if (parll)
58 parallel::stopCluster(cl)
59
60 indices #medoids
61 }
62
63 #' @rdname clustering
64 #' @export
65 clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
66 nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
67 {
68 if (verbose)
69 cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))
70
71 if (ncol(medoids) <= K2)
72 return (medoids)
73
74 # A) Obtain synchrones, that is to say the cumulated power consumptions
75 # for each of the K1 initial groups
76 synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
77 nb_series_per_chunk, ncores_clust, verbose, parll)
78
79 # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
80 distances = computeWerDists(
81 synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll)
82
83 # C) Apply clustering algorithm 2 on the WER distances matrix
84 if (verbose)
85 cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
86 medoids[ ,algoClust2(distances,K2) ]
87 }