X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=pkg%2FR%2Fclustering.R;fp=pkg%2FR%2Fclustering.R;h=5b5f6684763321b1b854d0cdbc2de6bb2b8ded16;hp=0000000000000000000000000000000000000000;hb=e906736ea27105237e84c904dce6170353726292;hpb=57f337af19cd6251815bb1ff2d62f4c58e8b6078 diff --git a/pkg/R/clustering.R b/pkg/R/clustering.R new file mode 100644 index 0000000..5b5f668 --- /dev/null +++ b/pkg/R/clustering.R @@ -0,0 +1,94 @@ +#' Two-stage clustering, within one task (see \code{claws()}) +#' +#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated +#' clustering on nb_curves / ntasks energy contributions, computed through +#' discrete wavelets coefficients. +#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances +#' computations between medoids (indices) output from stage 1, before applying +#' the second clustering algorithm on the distances matrix. +#' +#' @param getContribs Function to retrieve contributions from initial series indices: +#' \code{getContribs(indices)} outputs a contributions matrix, in columns +#' @inheritParams claws +#' @inheritParams computeSynchrones +#' @inheritParams computeWerDists +#' +#' @return The indices of the computed (resp. K1 and K2) medoids. +#' +#' @name clustering +#' @rdname clustering +#' @aliases clusteringTask1 clusteringTask2 +NULL + +#' @rdname clustering +#' @export +clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust, + ncores_clust=3, verbose=FALSE) +{ + if (verbose) + cat(paste("*** Clustering task 1 on ",length(indices)," series [start]\n", sep="")) + + if (length(indices) <= K1) + return (indices) + + parll <- (ncores_clust > 1) + if (parll) + { + # outfile=="" to see stderr/stdout on terminal + cl <- + if (verbose) + parallel::makeCluster(ncores_clust, outfile = "") + else + parallel::makeCluster(ncores_clust) + parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment()) + } + # Iterate clustering algorithm 1 until K1 medoids are found + while (length(indices) > K1) + { + # Balance tasks by splitting the indices set - as evenly as possible + indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1) + indices <- + if (parll) + { + unlist( parallel::parLapply(cl, indices_workers, function(inds) { + require("epclust", quietly=TRUE) + inds[ algoClust1(getContribs(inds), K1) ] + }) ) + } + else + { + unlist( lapply(indices_workers, function(inds) + inds[ algoClust1(getContribs(inds), K1) ] + ) ) + } + if (verbose) + { + cat(paste("*** Clustering task 1 on ",length(indices)," medoids [iter]\n", sep="")) + } + } + if (parll) + parallel::stopCluster(cl) + + indices #medoids +} + +#' @rdname clustering +#' @export +clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk, + smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE) +{ + if (verbose) + cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep="")) + + if (length(indices) <= K2) + return (indices) + + # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination) + distances <- computeWerDists(indices, getSeries, nb_series_per_chunk, + smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose) + + # B) Apply clustering algorithm 2 on the WER distances matrix + if (verbose) + cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep="")) + indices[ algoClust2(distances,K2) ] +}