| 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/clustering.R |
| 3 | \name{clustering} |
| 4 | \alias{clustering} |
| 5 | \alias{clusteringTask1} |
| 6 | \alias{computeClusters1} |
| 7 | \alias{computeClusters2} |
| 8 | \alias{clusteringTask1} |
| 9 | \alias{clusteringTask2} |
| 10 | \alias{computeClusters1} |
| 11 | \alias{computeClusters2} |
| 12 | \title{Two-stage clustering, withing one task (see \code{claws()})} |
| 13 | \usage{ |
| 14 | clusteringTask1(indices, getContribs, K1, nb_series_per_chunk, |
| 15 | ncores_clust = 1, verbose = FALSE, parll = TRUE) |
| 16 | |
| 17 | clusteringTask2(medoids, K2, getRefSeries, nb_ref_curves, nb_series_per_chunk, |
| 18 | ncores_clust = 1, verbose = FALSE, parll = TRUE) |
| 19 | |
| 20 | computeClusters1(contribs, K1, verbose = FALSE) |
| 21 | |
| 22 | computeClusters2(distances, K2, verbose = FALSE) |
| 23 | } |
| 24 | \arguments{ |
| 25 | \item{indices}{Range of series indices to cluster in parallel (initial data)} |
| 26 | |
| 27 | \item{getContribs}{Function to retrieve contributions from initial series indices: |
| 28 | \code{getContribs(indices)} outpus a contributions matrix} |
| 29 | |
| 30 | \item{K1}{Number of super-consumers to be found after stage 1 (K1 << N)} |
| 31 | |
| 32 | \item{nb_series_per_chunk}{(~Maximum) number of series in each group, inside a task} |
| 33 | |
| 34 | \item{ncores_clust}{"OpenMP" number of parallel clusterings in one task} |
| 35 | |
| 36 | \item{verbose}{Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage)} |
| 37 | |
| 38 | \item{parll}{TRUE to fully parallelize; otherwise run sequentially (debug, comparison)} |
| 39 | |
| 40 | \item{medoids}{big.matrix of medoids (curves of same length as initial series)} |
| 41 | |
| 42 | \item{K2}{Number of clusters to be found after stage 2 (K2 << K1)} |
| 43 | |
| 44 | \item{getRefSeries}{Function to retrieve initial series (e.g. in stage 2 after series |
| 45 | have been replaced by stage-1 medoids)} |
| 46 | |
| 47 | \item{nb_ref_curves}{How many reference series? (This number is known at this stage)} |
| 48 | |
| 49 | \item{contribs}{matrix of contributions (e.g. output of \code{curvesToContribs()})} |
| 50 | |
| 51 | \item{distances}{matrix of K1 x K1 (WER) distances between synchrones} |
| 52 | } |
| 53 | \value{ |
| 54 | For \code{clusteringTask1()} and \code{computeClusters1()}, the indices of the |
| 55 | computed (K1) medoids. Indices are irrelevant for stage 2 clustering, thus |
| 56 | \code{computeClusters2()} outputs a big.matrix of medoids |
| 57 | (of size limited by nb_series_per_chunk) |
| 58 | } |
| 59 | \description{ |
| 60 | \code{clusteringTask1()} runs one full stage-1 task, which consists in |
| 61 | iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed |
| 62 | through discrete wavelets coefficients). |
| 63 | \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones |
| 64 | and then WER distances computations, before applying the clustering algorithm. |
| 65 | \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic |
| 66 | clustering procedures respectively for stage 1 and 2. The former applies the |
| 67 | clustering algorithm (PAM) on a contributions matrix, while the latter clusters |
| 68 | a chunk of series inside one task (~max nb_series_per_chunk) |
| 69 | } |