drop enercast submodule; drop Rcpp requirement; fix doc, complete code, fix fix fix
[epclust.git] / epclust / R / clustering.R
... / ...
CommitLineData
1#' Two-stage clustering, within one task (see \code{claws()})
2#'
3#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
4#' stage 1 clustering on nb_curves / ntasks energy contributions, computed through
5#' discrete wavelets coefficients.
6#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
7#' computations between medoids (indices) output from stage 1, before applying
8#' the second clustering algorithm on the distances matrix.
9#'
10#' @param getContribs Function to retrieve contributions from initial series indices:
11#' \code{getContribs(indices)} outputs a contributions matrix
12#' @inheritParams claws
13#' @inheritParams computeSynchrones
14#' @inheritParams computeWerDists
15#'
16#' @return The indices of the computed (resp. K1 and K2) medoids.
17#'
18#' @name clustering
19#' @rdname clustering
20#' @aliases clusteringTask1 clusteringTask2
21NULL
22
23#' @rdname clustering
24#' @export
25clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust,
26 ncores_clust=1, verbose=FALSE, parll=TRUE)
27{
28 if (parll)
29 {
30 # outfile=="" to see stderr/stdout on terminal
31 cl <- parallel::makeCluster(ncores_clust, outfile = "")
32 parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
33 }
34 # Iterate clustering algorithm 1 until K1 medoids are found
35 while (length(indices) > K1)
36 {
37 # Balance tasks by splitting the indices set - as evenly as possible
38 indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1)
39 if (verbose)
40 cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
41 indices <-
42 if (parll)
43 {
44 unlist( parallel::parLapply(cl, indices_workers, function(inds) {
45 require("epclust", quietly=TRUE)
46 inds[ algoClust1(getContribs(inds), K1) ]
47 }) )
48 }
49 else
50 {
51 unlist( lapply(indices_workers, function(inds)
52 inds[ algoClust1(getContribs(inds), K1) ]
53 ) )
54 }
55 }
56 if (parll)
57 parallel::stopCluster(cl)
58
59 indices #medoids
60}
61
62#' @rdname clustering
63#' @export
64clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
65 smooth_lvl, nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
66{
67 if (verbose)
68 cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))
69
70 if (length(indices) <= K2)
71 return (indices)
72
73 # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
74 distances <- computeWerDists(indices, getSeries, nb_series_per_chunk,
75 smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose, parll)
76
77 # B) Apply clustering algorithm 2 on the WER distances matrix
78 if (verbose)
79 cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
80 indices[ algoClust2(distances,K2) ]
81}