[epclust.git] / pkg / man / clustering.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clustering.R
\name{clustering}
\alias{clustering}
\alias{clusteringTask1}
\alias{computeClusters1}
\alias{computeClusters2}
\alias{clusteringTask1}
\alias{clusteringTask2}
\alias{computeClusters1}
\alias{computeClusters2}
\title{Two-stage clustering, withing one task (see \code{claws()})}
\usage{
clusteringTask1(indices, getContribs, K1, nb_series_per_chunk,
  ncores_clust = 1, verbose = FALSE, parll = TRUE)

clusteringTask2(medoids, K2, getRefSeries, nb_ref_curves, nb_series_per_chunk,
  ncores_clust = 1, verbose = FALSE, parll = TRUE)

computeClusters1(contribs, K1, verbose = FALSE)

computeClusters2(distances, K2, verbose = FALSE)
}
\arguments{
\item{indices}{Range of series indices to cluster in parallel (initial data)}

\item{getContribs}{Function to retrieve contributions from initial series indices:
\code{getContribs(indices)} outpus a contributions matrix}

\item{K1}{Number of super-consumers to be found after stage 1 (K1 << N)}

\item{nb_series_per_chunk}{(~Maximum) number of series in each group, inside a task}

\item{ncores_clust}{"OpenMP" number of parallel clusterings in one task}

\item{verbose}{Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage)}

\item{parll}{TRUE to fully parallelize; otherwise run sequentially (debug, comparison)}

\item{medoids}{big.matrix of medoids (curves of same length as initial series)}

\item{K2}{Number of clusters to be found after stage 2 (K2 << K1)}

\item{getRefSeries}{Function to retrieve initial series (e.g. in stage 2 after series
have been replaced by stage-1 medoids)}

\item{nb_ref_curves}{How many reference series? (This number is known at this stage)}

\item{contribs}{matrix of contributions (e.g. output of \code{curvesToContribs()})}

\item{distances}{matrix of K1 x K1 (WER) distances between synchrones}
}
\value{
For \code{clusteringTask1()} and \code{computeClusters1()}, the indices of the
  computed (K1) medoids. Indices are irrelevant for stage 2 clustering, thus
  \code{computeClusters2()} outputs a big.matrix of medoids
  (of size limited by nb_series_per_chunk)
}
\description{
\code{clusteringTask1()} runs one full stage-1 task, which consists in
  iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed
  through discrete wavelets coefficients).
  \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones
  and then WER distances computations, before applying the clustering algorithm.
  \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic
  clustering procedures respectively for stage 1 and 2. The former applies the
  clustering algorithm (PAM) on a contributions matrix, while the latter clusters
  a chunk of series inside one task (~max nb_series_per_chunk)
}
Commit	Line	Data
	1	% Generated by roxygen2: do not edit by hand
	2	% Please edit documentation in R/clustering.R
	3	\name{clustering}
	4	\alias{clustering}
	5	\alias{clusteringTask1}
	6	\alias{computeClusters1}
	7	\alias{computeClusters2}
	8	\alias{clusteringTask1}
	9	\alias{clusteringTask2}
	10	\alias{computeClusters1}
	11	\alias{computeClusters2}
	12	\title{Two-stage clustering, withing one task (see \code{claws()})}
	13	\usage{
	14	clusteringTask1(indices, getContribs, K1, nb_series_per_chunk,
	15	ncores_clust = 1, verbose = FALSE, parll = TRUE)
	16
	17	clusteringTask2(medoids, K2, getRefSeries, nb_ref_curves, nb_series_per_chunk,
	18	ncores_clust = 1, verbose = FALSE, parll = TRUE)
	19
	20	computeClusters1(contribs, K1, verbose = FALSE)
	21
	22	computeClusters2(distances, K2, verbose = FALSE)
	23	}
	24	\arguments{
	25	\item{indices}{Range of series indices to cluster in parallel (initial data)}
	26
	27	\item{getContribs}{Function to retrieve contributions from initial series indices:
	28	\code{getContribs(indices)} outpus a contributions matrix}
	29
	30	\item{K1}{Number of super-consumers to be found after stage 1 (K1 << N)}
	31
	32	\item{nb_series_per_chunk}{(~Maximum) number of series in each group, inside a task}
	33
	34	\item{ncores_clust}{"OpenMP" number of parallel clusterings in one task}
	35
	36	\item{verbose}{Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage)}
	37
	38	\item{parll}{TRUE to fully parallelize; otherwise run sequentially (debug, comparison)}
	39
	40	\item{medoids}{big.matrix of medoids (curves of same length as initial series)}
	41
	42	\item{K2}{Number of clusters to be found after stage 2 (K2 << K1)}
	43
	44	\item{getRefSeries}{Function to retrieve initial series (e.g. in stage 2 after series
	45	have been replaced by stage-1 medoids)}
	46
	47	\item{nb_ref_curves}{How many reference series? (This number is known at this stage)}
	48
	49	\item{contribs}{matrix of contributions (e.g. output of \code{curvesToContribs()})}
	50
	51	\item{distances}{matrix of K1 x K1 (WER) distances between synchrones}
	52	}
	53	\value{
	54	For \code{clusteringTask1()} and \code{computeClusters1()}, the indices of the
	55	computed (K1) medoids. Indices are irrelevant for stage 2 clustering, thus
	56	\code{computeClusters2()} outputs a big.matrix of medoids
	57	(of size limited by nb_series_per_chunk)
	58	}
	59	\description{
	60	\code{clusteringTask1()} runs one full stage-1 task, which consists in
	61	iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed
	62	through discrete wavelets coefficients).
	63	\code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones
	64	and then WER distances computations, before applying the clustering algorithm.
	65	\code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic
	66	clustering procedures respectively for stage 1 and 2. The former applies the
	67	clustering algorithm (PAM) on a contributions matrix, while the latter clusters
	68	a chunk of series inside one task (~max nb_series_per_chunk)
	69	}