improve/fix comments - TODO: debug examples, CSV and after
[epclust.git] / epclust / R / clustering.R
CommitLineData
3c5a4b08 1#' Two-stage clustering, within one task (see \code{claws()})
4bcfdbee 2#'
3c5a4b08 3#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
3fb6e823 4#' clustering on nb_curves / ntasks energy contributions, computed through
3c5a4b08
BA
5#' discrete wavelets coefficients.
6#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
7#' computations between medoids (indices) output from stage 1, before applying
8#' the second clustering algorithm on the distances matrix.
4bcfdbee 9#'
4bcfdbee 10#' @param getContribs Function to retrieve contributions from initial series indices:
3fb6e823 11#' \code{getContribs(indices)} outputs a contributions matrix, in columns
4bcfdbee 12#' @inheritParams claws
40f12a2f 13#' @inheritParams computeSynchrones
3c5a4b08
BA
14#' @inheritParams computeWerDists
15#'
16#' @return The indices of the computed (resp. K1 and K2) medoids.
4bcfdbee 17#'
3c5a4b08
BA
18#' @name clustering
19#' @rdname clustering
20#' @aliases clusteringTask1 clusteringTask2
4bcfdbee
BA
21NULL
22
23#' @rdname clustering
24#' @export
282342ba 25clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust,
3fb6e823 26 ncores_clust=3, verbose=FALSE, parll=TRUE)
5c652979 27{
492cd9e7 28 if (parll)
7b13d0c2 29 {
282342ba 30 # outfile=="" to see stderr/stdout on terminal
3fb6e823
BA
31 cl <-
32 if (verbose)
33 parallel::makeCluster(ncores_clust, outfile = "")
34 else
35 parallel::makeCluster(ncores_clust)
d9bb53c5 36 parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
7b13d0c2 37 }
d9bb53c5 38 # Iterate clustering algorithm 1 until K1 medoids are found
492cd9e7
BA
39 while (length(indices) > K1)
40 {
d9bb53c5 41 # Balance tasks by splitting the indices set - as evenly as possible
282342ba 42 indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1)
0486fbad
BA
43 if (verbose)
44 cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
e161499b
BA
45 indices <-
46 if (parll)
47 {
48 unlist( parallel::parLapply(cl, indices_workers, function(inds) {
49 require("epclust", quietly=TRUE)
0486fbad 50 inds[ algoClust1(getContribs(inds), K1) ]
e161499b
BA
51 }) )
52 }
53 else
54 {
55 unlist( lapply(indices_workers, function(inds)
0486fbad 56 inds[ algoClust1(getContribs(inds), K1) ]
e161499b
BA
57 ) )
58 }
492cd9e7
BA
59 }
60 if (parll)
61 parallel::stopCluster(cl)
62
56857861 63 indices #medoids
5c652979
BA
64}
65
4bcfdbee
BA
66#' @rdname clustering
67#' @export
282342ba 68clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
3fb6e823 69 smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE, parll=TRUE)
5c652979 70{
e161499b 71 if (verbose)
3c5a4b08 72 cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))
d9bb53c5 73
3c5a4b08
BA
74 if (length(indices) <= K2)
75 return (indices)
d9bb53c5 76
3c5a4b08 77 # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
282342ba
BA
78 distances <- computeWerDists(indices, getSeries, nb_series_per_chunk,
79 smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose, parll)
d9bb53c5 80
3c5a4b08 81 # B) Apply clustering algorithm 2 on the WER distances matrix
e161499b 82 if (verbose)
a52836b2 83 cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
3c5a4b08 84 indices[ algoClust2(distances,K2) ]
e161499b 85}