X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=pkg%2Fman%2Fclustering.Rd;fp=pkg%2Fman%2Fclustering.Rd;h=1768cab8979dbf5eb36be9b2f15ffb173eac35dc;hp=0000000000000000000000000000000000000000;hb=e906736ea27105237e84c904dce6170353726292;hpb=57f337af19cd6251815bb1ff2d62f4c58e8b6078 diff --git a/pkg/man/clustering.Rd b/pkg/man/clustering.Rd new file mode 100644 index 0000000..1768cab --- /dev/null +++ b/pkg/man/clustering.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering.R +\name{clustering} +\alias{clustering} +\alias{clusteringTask1} +\alias{computeClusters1} +\alias{computeClusters2} +\alias{clusteringTask1} +\alias{clusteringTask2} +\alias{computeClusters1} +\alias{computeClusters2} +\title{Two-stage clustering, withing one task (see \code{claws()})} +\usage{ +clusteringTask1(indices, getContribs, K1, nb_series_per_chunk, + ncores_clust = 1, verbose = FALSE, parll = TRUE) + +clusteringTask2(medoids, K2, getRefSeries, nb_ref_curves, nb_series_per_chunk, + ncores_clust = 1, verbose = FALSE, parll = TRUE) + +computeClusters1(contribs, K1, verbose = FALSE) + +computeClusters2(distances, K2, verbose = FALSE) +} +\arguments{ +\item{indices}{Range of series indices to cluster in parallel (initial data)} + +\item{getContribs}{Function to retrieve contributions from initial series indices: +\code{getContribs(indices)} outpus a contributions matrix} + +\item{K1}{Number of super-consumers to be found after stage 1 (K1 << N)} + +\item{nb_series_per_chunk}{(~Maximum) number of series in each group, inside a task} + +\item{ncores_clust}{"OpenMP" number of parallel clusterings in one task} + +\item{verbose}{Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage)} + +\item{parll}{TRUE to fully parallelize; otherwise run sequentially (debug, comparison)} + +\item{medoids}{big.matrix of medoids (curves of same length as initial series)} + +\item{K2}{Number of clusters to be found after stage 2 (K2 << K1)} + +\item{getRefSeries}{Function to retrieve initial series (e.g. in stage 2 after series +have been replaced by stage-1 medoids)} + +\item{nb_ref_curves}{How many reference series? (This number is known at this stage)} + +\item{contribs}{matrix of contributions (e.g. output of \code{curvesToContribs()})} + +\item{distances}{matrix of K1 x K1 (WER) distances between synchrones} +} +\value{ +For \code{clusteringTask1()} and \code{computeClusters1()}, the indices of the + computed (K1) medoids. Indices are irrelevant for stage 2 clustering, thus + \code{computeClusters2()} outputs a big.matrix of medoids + (of size limited by nb_series_per_chunk) +} +\description{ +\code{clusteringTask1()} runs one full stage-1 task, which consists in + iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed + through discrete wavelets coefficients). + \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones + and then WER distances computations, before applying the clustering algorithm. + \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic + clustering procedures respectively for stage 1 and 2. The former applies the + clustering algorithm (PAM) on a contributions matrix, while the latter clusters + a chunk of series inside one task (~max nb_series_per_chunk) +}