'update'
[epclust.git] / epclust / R / clustering.R
CommitLineData
4bcfdbee
BA
1#' @name clustering
2#' @rdname clustering
eef6f6c9 3#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
4bcfdbee 4#'
492cd9e7 5#' @title Two-stage clustering, withing one task (see \code{claws()})
4bcfdbee 6#'
492cd9e7 7#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
40f12a2f
BA
8#' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed
9#' through discrete wavelets coefficients.
10#' \code{clusteringTask2()} runs a full stage-2 task, which consists in
11#' WER distances computations between medoids indices output from stage 1,
12#' before applying the second clustering algorithm, on the distances matrix.
4bcfdbee 13#'
40f12a2f 14#' @param indices Range of series indices to cluster
4bcfdbee 15#' @param getContribs Function to retrieve contributions from initial series indices:
40f12a2f 16#' \code{getContribs(indices)} outputs a contributions matrix
4bcfdbee 17#' @inheritParams claws
40f12a2f 18#' @inheritParams computeSynchrones
4bcfdbee 19#'
0486fbad
BA
20#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
21#' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
22#' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
4bcfdbee
BA
23NULL
24
25#' @rdname clustering
26#' @export
40f12a2f 27clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk,
37c82bba 28 ncores_clust=1, verbose=FALSE, parll=TRUE)
5c652979 29{
492cd9e7 30 if (parll)
7b13d0c2 31 {
37c82bba 32 cl = parallel::makeCluster(ncores_clust, outfile = "")
d9bb53c5 33 parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
7b13d0c2 34 }
d9bb53c5 35 # Iterate clustering algorithm 1 until K1 medoids are found
492cd9e7
BA
36 while (length(indices) > K1)
37 {
d9bb53c5 38 # Balance tasks by splitting the indices set - as evenly as possible
40f12a2f 39 indices_workers = .splitIndices(indices, nb_items_clust1)
0486fbad
BA
40 if (verbose)
41 cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
e161499b
BA
42 indices <-
43 if (parll)
44 {
45 unlist( parallel::parLapply(cl, indices_workers, function(inds) {
46 require("epclust", quietly=TRUE)
0486fbad 47 inds[ algoClust1(getContribs(inds), K1) ]
e161499b
BA
48 }) )
49 }
50 else
51 {
52 unlist( lapply(indices_workers, function(inds)
0486fbad 53 inds[ algoClust1(getContribs(inds), K1) ]
e161499b
BA
54 ) )
55 }
492cd9e7
BA
56 }
57 if (parll)
58 parallel::stopCluster(cl)
59
56857861 60 indices #medoids
5c652979
BA
61}
62
4bcfdbee
BA
63#' @rdname clustering
64#' @export
40f12a2f
BA
65clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
66 nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
5c652979 67{
e161499b 68 if (verbose)
0486fbad 69 cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))
e161499b 70
0486fbad 71 if (ncol(medoids) <= K2)
bf5c0844 72 return (medoids)
d9bb53c5
BA
73
74 # A) Obtain synchrones, that is to say the cumulated power consumptions
75 # for each of the K1 initial groups
0486fbad 76 synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
d9bb53c5
BA
77 nb_series_per_chunk, ncores_clust, verbose, parll)
78
79 # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
a52836b2
BA
80 distances = computeWerDists(
81 synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll)
d9bb53c5
BA
82
83 # C) Apply clustering algorithm 2 on the WER distances matrix
e161499b 84 if (verbose)
a52836b2 85 cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
9f05a4a0 86 medoids[ ,algoClust2(distances,K2) ]
e161499b 87}