- synchrones = computeSynchrones(medoids,
- getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust, verbose, parll)
- distances = computeWerDists(synchrones, ncores_clust, verbose, parll)
- # PAM in package 'cluster' cannot take big.matrix in input: need to cast it
- medoids[ computeClusters2(distances[,],K2,verbose), ]
-}
-
-#' @rdname clustering
-#' @export
-computeClusters1 = function(contribs, K1, verbose=FALSE)
-{
- if (verbose)
- cat(paste(" computeClusters1() on ",nrow(contribs)," lines\n", sep=""))
- cluster::pam(contribs, K1, diss=FALSE)$id.med
-}
-
-#' @rdname clustering
-#' @export
-computeClusters2 = function(distances, K2, verbose=FALSE)
-{
- if (verbose)
- cat(paste(" computeClusters2() on ",nrow(distances)," lines\n", sep=""))
- cluster::pam(distances, K2, diss=TRUE)$id.med
-}
-
-#' computeSynchrones
-#'
-#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
-#' using L2 distances.
-#'
-#' @param medoids big.matrix of medoids (curves of same length as initial series)
-#' @param getRefSeries Function to retrieve initial series (e.g. in stage 2 after series
-#' have been replaced by stage-1 medoids)
-#' @param nb_ref_curves How many reference series? (This number is known at this stage)
-#' @inheritParams claws
-#'
-#' @return A big.matrix of size K1 x L where L = data_length
-#'
-#' @export
-computeSynchrones = function(medoids, getRefSeries,
- nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
-{
- if (verbose)
- cat(paste("--- Compute synchrones\n", sep=""))
-
- computeSynchronesChunk = function(indices)
- {
- ref_series = getRefSeries(indices)
- nb_series = nrow(ref_series)
- #get medoids indices for this chunk of series
-
- #TODO: debug this (address is OK but values are garbage: why?)
-# mi = .Call("computeMedoidsIndices", medoids@address, ref_series, PACKAGE="epclust")
-
- #R-equivalent, requiring a matrix (thus potentially breaking "fit-in-memory" hope)
- mat_meds = medoids[,]
- mi = rep(NA,nb_series)
- for (i in 1:nb_series)
- mi[i] <- which.min( rowSums( sweep(mat_meds, 2, ref_series[i,], '-')^2 ) )
- rm(mat_meds); gc()
-
- for (i in seq_len(nb_series))
- {
- if (parll)
- synchronicity::lock(m)
- synchrones[mi[i],] = synchrones[mi[i],] + ref_series[i,]
- counts[mi[i],1] = counts[mi[i],1] + 1
- if (parll)
- synchronicity::unlock(m)
- }
- }
-
- K = nrow(medoids) ; L = ncol(medoids)
- # Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
- # TODO: if size > RAM (not our case), use file-backed big.matrix
- synchrones = bigmemory::big.matrix(nrow=K, ncol=L, type="double", init=0.)
- counts = bigmemory::big.matrix(nrow=K, ncol=1, type="double", init=0)
- # synchronicity is only for Linux & MacOS; on Windows: run sequentially
- parll = (requireNamespace("synchronicity",quietly=TRUE)
- && parll && Sys.info()['sysname'] != "Windows")
- if (parll)
- m <- synchronicity::boost.mutex()
-
- if (parll)
- {
- cl = parallel::makeCluster(ncores_clust)
- parallel::clusterExport(cl,
- varlist=c("synchrones","counts","verbose","medoids","getRefSeries"),
- envir=environment())
- }