X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fclustering.R;h=f5e497fa0784078768e9ab7bf9d530817510a661;hb=bf5c08443087a23ea3d1a7ab993568e608a8b5dd;hp=3993e7685c97b194644d3600ceeea6b7bdac54ac;hpb=24ed5d835e2eebaaa4d5f8296f8d2e2132cc6398;p=epclust.git diff --git a/epclust/R/clustering.R b/epclust/R/clustering.R index 3993e76..f5e497f 100644 --- a/epclust/R/clustering.R +++ b/epclust/R/clustering.R @@ -6,11 +6,13 @@ #' #' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in #' iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed -#' through discrete wavelets coefficients). \code{computeClusters1()} and -#' \code{computeClusters2()} correspond to the atomic clustering procedures respectively -#' for stage 1 and 2. The former applies the clustering algorithm (PAM) on a -#' contributions matrix, while the latter clusters a chunk of series inside one task -#' (~max nb_series_per_chunk) +#' through discrete wavelets coefficients). +#' \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones +#' and then WER distances computations, before applying the clustering algorithm. +#' \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic +#' clustering procedures respectively for stage 1 and 2. The former applies the +#' clustering algorithm (PAM) on a contributions matrix, while the latter clusters +#' a chunk of series inside one task (~max nb_series_per_chunk) #' #' @param indices Range of series indices to cluster in parallel (initial data) #' @param getContribs Function to retrieve contributions from initial series indices: @@ -62,21 +64,31 @@ clusteringTask1 = function( #' @rdname clustering #' @export -computeClusters1 = function(contribs, K1) - cluster::pam(contribs, K1, diss=FALSE)$id.med - -#' @rdname clustering -#' @export -computeClusters2 = function(medoids, K2, +clusteringTask2 = function(medoids, K2, getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE) { + if (nrow(medoids) <= K2) + return (medoids) synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust, verbose, parll) distances = computeWerDists(synchrones, ncores_clust, verbose, parll) - #TODO: if PAM cannot take big.matrix in input, cast it before... (more than OK in RAM) - medoids[ cluster::pam(distances, K2, diss=TRUE)$medoids , ] + # PAM in package 'cluster' cannot take big.matrix in input: need to cast it + mat_dists = matrix(nrow=K1, ncol=K1) + for (i in seq_len(K1)) + mat_dists[i,] = distances[i,] + medoids[ computeClusters2(mat_dists,K2), ] } +#' @rdname clustering +#' @export +computeClusters1 = function(contribs, K1) + cluster::pam(contribs, K1, diss=FALSE)$id.med + +#' @rdname clustering +#' @export +computeClusters2 = function(distances, K2) + cluster::pam(distances, K2, diss=TRUE)$id.med + #' computeSynchrones #' #' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,