X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fclustering.R;h=3e7fd3866922d1c56b48741642ca88fd55d5082c;hb=dc86eb0c992e6e4ab119d48398d040c4cf3a75fd;hp=bea073a660e3c4f201546caa87d539522d4671a9;hpb=40f12a2f66d06fd77183ea02b996f5c66f90761c;p=epclust.git diff --git a/epclust/R/clustering.R b/epclust/R/clustering.R index bea073a..3e7fd38 100644 --- a/epclust/R/clustering.R +++ b/epclust/R/clustering.R @@ -1,44 +1,51 @@ -#' @name clustering -#' @rdname clustering -#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2 +#' Two-stage clustering, within one task (see \code{claws()}) #' -#' @title Two-stage clustering, withing one task (see \code{claws()}) +#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated +#' clustering on nb_curves / ntasks energy contributions, computed through +#' discrete wavelets coefficients. +#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances +#' computations between medoids (indices) output from stage 1, before applying +#' the second clustering algorithm on the distances matrix. #' -#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in -#' iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed -#' through discrete wavelets coefficients. -#' \code{clusteringTask2()} runs a full stage-2 task, which consists in -#' WER distances computations between medoids indices output from stage 1, -#' before applying the second clustering algorithm, on the distances matrix. -#' -#' @param indices Range of series indices to cluster #' @param getContribs Function to retrieve contributions from initial series indices: -#' \code{getContribs(indices)} outputs a contributions matrix +#' \code{getContribs(indices)} outputs a contributions matrix, in columns #' @inheritParams claws #' @inheritParams computeSynchrones +#' @inheritParams computeWerDists +#' +#' @return The indices of the computed (resp. K1 and K2) medoids. #' -#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids. -#' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()} -#' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters) +#' @name clustering +#' @rdname clustering +#' @aliases clusteringTask1 clusteringTask2 NULL #' @rdname clustering #' @export -clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk, - ncores_clust=1, verbose=FALSE, parll=TRUE) +clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust, + ncores_clust=3, verbose=FALSE, parll=TRUE) { + if (verbose) + cat(paste("*** Clustering task 1 on ",length(indices)," series\n", sep="")) + + if (length(indices) <= K1) + return (indices) + if (parll) { - cl = parallel::makeCluster(ncores_clust, outfile = "") + # outfile=="" to see stderr/stdout on terminal + cl <- + if (verbose) + parallel::makeCluster(ncores_clust, outfile = "") + else + parallel::makeCluster(ncores_clust) parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) } # Iterate clustering algorithm 1 until K1 medoids are found while (length(indices) > K1) { # Balance tasks by splitting the indices set - as evenly as possible - indices_workers = .splitIndices(indices, nb_items_clust1) - if (verbose) - cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep="")) + indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1) indices <- if (parll) { @@ -53,6 +60,11 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_c inds[ algoClust1(getContribs(inds), K1) ] ) ) } + if (verbose) + { + cat(paste("*** [iterated] Clustering task 1: now ", + length(indices)," medoids\n", sep="")) + } } if (parll) parallel::stopCluster(cl) @@ -62,26 +74,21 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_c #' @rdname clustering #' @export -clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, - nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE) +clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, + smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE, parll=TRUE) { if (verbose) - cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep="")) - - if (ncol(medoids) <= K2) - return (medoids) + cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep="")) - # A) Obtain synchrones, that is to say the cumulated power consumptions - # for each of the K1 initial groups - synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves, - nb_series_per_chunk, ncores_clust, verbose, parll) + if (length(indices) <= K2) + return (indices) - # B) Compute the WER distances (Wavelets Extended coefficient of deteRmination) - distances = computeWerDists( - synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll) + # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination) + distances <- computeWerDists(indices, getSeries, nb_series_per_chunk, + smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose, parll) - # C) Apply clustering algorithm 2 on the WER distances matrix + # B) Apply clustering algorithm 2 on the WER distances matrix if (verbose) cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) - medoids[ ,algoClust2(distances,K2) ] + indices[ algoClust2(distances,K2) ] }