-computeClusters1 = function(contribs, K1)
- cluster::pam(contribs, K1, diss=FALSE)$id.med
-
-#' @rdname clustering
-#' @export
-computeClusters2 = function(medoids, K2,
- getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
-{
- synchrones = computeSynchrones(medoids,
- getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust, verbose, parll)
- distances = computeWerDists(synchrones, ncores_clust, verbose, parll)
- medoids[ cluster::pam(distances, K2, diss=TRUE)$medoids , ]
-}
-
-#' computeSynchrones
-#'
-#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
-#' using L2 distances.
-#'
-#' @param medoids Matrix of medoids (curves of same legnth as initial series)
-#' @param getRefSeries Function to retrieve initial series (e.g. in stage 2 after series
-#' have been replaced by stage-1 medoids)
-#' @param nb_ref_curves How many reference series? (This number is known at this stage)
-#' @inheritParams claws
-#'
-#' @export
-computeSynchrones = function(medoids, getRefSeries,
- nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
-{
- computeSynchronesChunk = function(indices)
- {
- ref_series = getRefSeries(indices)
- #get medoids indices for this chunk of series
- for (i in seq_len(nrow(ref_series)))
- {
- j = which.min( rowSums( sweep(medoids, 2, ref_series[i,], '-')^2 ) )
- if (parll)
- synchronicity::lock(m)
- synchrones[j,] = synchrones[j,] + ref_series[i,]
- counts[j,1] = counts[j,1] + 1
- if (parll)
- synchronicity::unlock(m)
- }
- }
-
- K = nrow(medoids)
- # Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
- synchrones = bigmemory::big.matrix(nrow=K,ncol=ncol(medoids),type="double",init=0.)
- counts = bigmemory::big.matrix(nrow=K,ncol=1,type="double",init=0)
- # Fork (// run) only on Linux & MacOS; on Windows: run sequentially
- parll = (requireNamespace("synchronicity",quietly=TRUE)
- && parll && Sys.info()['sysname'] != "Windows")
- if (parll)
- m <- synchronicity::boost.mutex()
-
- indices_workers = .spreadIndices(seq_len(nb_ref_curves), nb_series_per_chunk)
- for (inds in indices_workers)
- {
- if (verbose)
- {
- cat(paste("--- Compute synchrones for indices range ",
- min(inds)," -> ",max(inds),"\n", sep=""))
- }
- if (parll)
- ignored <- parallel::mcparallel(computeSynchronesChunk(inds))
- else
- computeSynchronesChunk(inds)
- }
- if (parll)
- parallel::mccollect()
-
- mat_syncs = matrix(nrow=K, ncol=ncol(medoids))
- vec_count = rep(NA, K)
- #TODO: can we avoid this loop?
- for (i in seq_len(K))
- {
- mat_syncs[i,] = synchrones[i,]
- vec_count[i] = counts[i,1]
- }
- #NOTE: odds for some clusters to be empty? (when series already come from stage 2)
- # ...maybe; but let's hope resulting K1' be still quite bigger than K2
- mat_syncs = sweep(mat_syncs, 1, vec_count, '/')
- mat_syncs[ sapply(seq_len(K), function(i) all(!is.nan(mat_syncs[i,]))) , ]
-}
-
-#' computeWerDists
-#'
-#' Compute the WER distances between the synchrones curves (in rows), which are
-#' returned (e.g.) by \code{computeSynchrones()}
-#'
-#' @param synchrones A matrix of synchrones, in rows. The series have same length as the
-#' series in the initial dataset
-#' @inheritParams claws
-#'
-#' @export
-computeWerDists = function(synchrones, ncores_clust=1,verbose=FALSE,parll=TRUE)