X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fclustering.R;h=3e7fd3866922d1c56b48741642ca88fd55d5082c;hb=dc86eb0c992e6e4ab119d48398d040c4cf3a75fd;hp=b91d512b23b6b1cd82eb4f8689e96ea44448b420;hpb=3c5a4b0880db63367a474a568e1322b3999932fe;p=epclust.git diff --git a/epclust/R/clustering.R b/epclust/R/clustering.R index b91d512..3e7fd38 100644 --- a/epclust/R/clustering.R +++ b/epclust/R/clustering.R @@ -1,14 +1,14 @@ #' Two-stage clustering, within one task (see \code{claws()}) #' #' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated -#' stage 1 clustering on nb_curves / ntasks energy contributions, computed through +#' clustering on nb_curves / ntasks energy contributions, computed through #' discrete wavelets coefficients. #' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances #' computations between medoids (indices) output from stage 1, before applying #' the second clustering algorithm on the distances matrix. #' #' @param getContribs Function to retrieve contributions from initial series indices: -#' \code{getContribs(indices)} outputs a contributions matrix +#' \code{getContribs(indices)} outputs a contributions matrix, in columns #' @inheritParams claws #' @inheritParams computeSynchrones #' @inheritParams computeWerDists @@ -22,21 +22,30 @@ NULL #' @rdname clustering #' @export -clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust, - ncores_clust=1, verbose=FALSE, parll=TRUE) +clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust, + ncores_clust=3, verbose=FALSE, parll=TRUE) { + if (verbose) + cat(paste("*** Clustering task 1 on ",length(indices)," series\n", sep="")) + + if (length(indices) <= K1) + return (indices) + if (parll) { - cl = parallel::makeCluster(ncores_clust, outfile = "") + # outfile=="" to see stderr/stdout on terminal + cl <- + if (verbose) + parallel::makeCluster(ncores_clust, outfile = "") + else + parallel::makeCluster(ncores_clust) parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) } # Iterate clustering algorithm 1 until K1 medoids are found while (length(indices) > K1) { # Balance tasks by splitting the indices set - as evenly as possible - indices_workers = .splitIndices(indices, nb_items_clust, min_size=K1+1) - if (verbose) - cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep="")) + indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1) indices <- if (parll) { @@ -51,6 +60,11 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust, inds[ algoClust1(getContribs(inds), K1) ] ) ) } + if (verbose) + { + cat(paste("*** [iterated] Clustering task 1: now ", + length(indices)," medoids\n", sep="")) + } } if (parll) parallel::stopCluster(cl) @@ -60,8 +74,8 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust, #' @rdname clustering #' @export -clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, - nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE) +clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, + smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE, parll=TRUE) { if (verbose) cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep="")) @@ -70,8 +84,8 @@ clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chu return (indices) # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination) - distances = computeWerDists(indices, getSeries, nb_series_per_chunk, - nvoice, nbytes, endian, ncores_clust, verbose, parll) + distances <- computeWerDists(indices, getSeries, nb_series_per_chunk, + smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose, parll) # B) Apply clustering algorithm 2 on the WER distances matrix if (verbose)