epclust/R/clustering.R

   1 #' @name clustering
   2 #' @rdname clustering
   3 #' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
   4 #'
   5 #' @title Two-stage clustering, withing one task (see \code{claws()})
   6 #'
   7 #' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
   8 #'   iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed
   9 #'   through discrete wavelets coefficients.
  10 #'   \code{clusteringTask2()} runs a full stage-2 task, which consists in
  11 #'   WER distances computations between medoids indices output from stage 1,
  12 #'   before applying the second clustering algorithm, on the distances matrix.
  13 #'
  14 #' @param indices Range of series indices to cluster
  15 #' @param getContribs Function to retrieve contributions from initial series indices:
  16 #'   \code{getContribs(indices)} outputs a contributions matrix
  17 #' @inheritParams claws
  18 #' @inheritParams computeSynchrones
  19 #'
  20 #' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
  21 #'   Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
  22 #'   outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
  23 NULL
  24
  25 #' @rdname clustering
  26 #' @export
  27 clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk,
  28     ncores_clust=1, verbose=FALSE, parll=TRUE)
  29 {
  30     if (parll)
  31     {
  32         cl = parallel::makeCluster(ncores_clust, outfile = "")
  33         parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
  34     }
  35     # Iterate clustering algorithm 1 until K1 medoids are found
  36     while (length(indices) > K1)
  37     {
  38         # Balance tasks by splitting the indices set - as evenly as possible
  39         indices_workers = .splitIndices(indices, nb_items_clust1)
  40         if (verbose)
  41             cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
  42         indices <-
  43             if (parll)
  44             {
  45                 unlist( parallel::parLapply(cl, indices_workers, function(inds) {
  46                     require("epclust", quietly=TRUE)
  47                     inds[ algoClust1(getContribs(inds), K1) ]
  48                 }) )
  49             }
  50             else
  51             {
  52                 unlist( lapply(indices_workers, function(inds)
  53                     inds[ algoClust1(getContribs(inds), K1) ]
  54                 ) )
  55             }
  56     }
  57     if (parll)
  58         parallel::stopCluster(cl)
  59
  60     indices #medoids
  61 }
  62
  63 #' @rdname clustering
  64 #' @export
  65 clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
  66     nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
  67 {
  68     if (verbose)
  69         cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))
  70
  71     if (ncol(medoids) <= K2)
  72         return (medoids)
  73
  74     # A) Obtain synchrones, that is to say the cumulated power consumptions
  75     #    for each of the K1 initial groups
  76     synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
  77         nb_series_per_chunk, ncores_clust, verbose, parll)
  78
  79     # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
  80     distances = computeWerDists(
  81         synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll)
  82
  83     # C) Apply clustering algorithm 2 on the WER distances matrix
  84     if (verbose)
  85         cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
  86     medoids[ ,algoClust2(distances,K2) ]
  87 }