X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=epclust%2FR%2Fclustering.R;h=b91d512b23b6b1cd82eb4f8689e96ea44448b420;hp=bea073a660e3c4f201546caa87d539522d4671a9;hb=3c5a4b0880db63367a474a568e1322b3999932fe;hpb=40f12a2f66d06fd77183ea02b996f5c66f90761c diff --git a/epclust/R/clustering.R b/epclust/R/clustering.R index bea073a..b91d512 100644 --- a/epclust/R/clustering.R +++ b/epclust/R/clustering.R @@ -1,30 +1,28 @@ -#' @name clustering -#' @rdname clustering -#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2 +#' Two-stage clustering, within one task (see \code{claws()}) #' -#' @title Two-stage clustering, withing one task (see \code{claws()}) +#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated +#' stage 1 clustering on nb_curves / ntasks energy contributions, computed through +#' discrete wavelets coefficients. +#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances +#' computations between medoids (indices) output from stage 1, before applying +#' the second clustering algorithm on the distances matrix. #' -#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in -#' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed -#' through discrete wavelets coefficients. -#' \code{clusteringTask2()} runs a full stage-2 task, which consists in -#' WER distances computations between medoids indices output from stage 1, -#' before applying the second clustering algorithm, on the distances matrix. -#' -#' @param indices Range of series indices to cluster #' @param getContribs Function to retrieve contributions from initial series indices: #' \code{getContribs(indices)} outputs a contributions matrix #' @inheritParams claws #' @inheritParams computeSynchrones +#' @inheritParams computeWerDists +#' +#' @return The indices of the computed (resp. K1 and K2) medoids. #' -#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids. -#' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()} -#' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters) +#' @name clustering +#' @rdname clustering +#' @aliases clusteringTask1 clusteringTask2 NULL #' @rdname clustering #' @export -clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk, +clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust, ncores_clust=1, verbose=FALSE, parll=TRUE) { if (parll) @@ -36,7 +34,7 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_c while (length(indices) > K1) { # Balance tasks by splitting the indices set - as evenly as possible - indices_workers = .splitIndices(indices, nb_items_clust1) + indices_workers = .splitIndices(indices, nb_items_clust, min_size=K1+1) if (verbose) cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep="")) indices <- @@ -66,22 +64,17 @@ clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chu nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE) { if (verbose) - cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep="")) - - if (ncol(medoids) <= K2) - return (medoids) + cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep="")) - # A) Obtain synchrones, that is to say the cumulated power consumptions - # for each of the K1 initial groups - synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves, - nb_series_per_chunk, ncores_clust, verbose, parll) + if (length(indices) <= K2) + return (indices) - # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination) - distances = computeWerDists( - synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll) + # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination) + distances = computeWerDists(indices, getSeries, nb_series_per_chunk, + nvoice, nbytes, endian, ncores_clust, verbose, parll) - # C) Apply clustering algorithm 2 on the WER distances matrix + # B) Apply clustering algorithm 2 on the WER distances matrix if (verbose) cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) - medoids[ ,algoClust2(distances,K2) ] + indices[ algoClust2(distances,K2) ] }