#' computeSynchrones #' #' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids, #' using euclidian distance. #' #' @param medoids matrix of medoids in columns (curves of same length as the series) #' @param getSeries Function to retrieve series (argument: 'indices', integer vector) #' @param nb_curves How many series? (this is known, at this stage) #' @inheritParams claws #' #' @return A matrix of K synchrones in columns (same length as the series) #' #' @export computeSynchrones <- function(medoids, getSeries, nb_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE) { # Synchrones computation is embarassingly parallel: compute it by chunks of series computeSynchronesChunk <- function(indices) { if (parll) { require("bigmemory", quietly=TRUE) requireNamespace("synchronicity", quietly=TRUE) require("epclust", quietly=TRUE) # The big.matrix objects need to be attached to be usable on the workers synchrones <- bigmemory::attach.big.matrix(synchrones_desc) medoids <- bigmemory::attach.big.matrix(medoids_desc) m <- synchronicity::attach.mutex(m_desc) } # Obtain a chunk of reference series series_chunk <- getSeries(indices) nb_series_chunk <- ncol(series_chunk) # Get medoids indices for this chunk of series mi <- assignMedoids(series_chunk, medoids[,]) # Update synchrones using mi above, grouping it by values of mi (in 1...K) # to avoid too many lock/unlock for (i in seq_len(K)) { # lock / unlock required because several writes at the same time if (parll) synchronicity::lock(m) synchrones[,i] <- synchrones[,i] + rowSums(as.matrix(series_chunk[,mi==i])) if (parll) synchronicity::unlock(m) } NULL } K <- ncol(medoids) L <- nrow(medoids) # Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in // synchrones <- bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.) # NOTE: synchronicity is only for Linux & MacOS; on Windows: run sequentially parll <- (parll && requireNamespace("synchronicity",quietly=TRUE) && Sys.info()['sysname'] != "Windows") if (parll) { m <- synchronicity::boost.mutex() #for lock/unlock, see computeSynchronesChunk # mutex and big.matrix objects cannot be passed directly: # they will be accessed from their description m_desc <- synchronicity::describe(m) synchrones_desc <- bigmemory::describe(synchrones) medoids <- bigmemory::as.big.matrix(medoids) medoids_desc <- bigmemory::describe(medoids) # outfile=="" to see stderr/stdout on terminal cl <- parallel::makeCluster(ncores_clust, outfile="") parallel::clusterExport(cl, envir=environment(), varlist=c("synchrones_desc","m_desc","medoids_desc","getSeries")) } if (verbose) cat(paste("--- Compute ",K," synchrones with ",nb_curves," series\n", sep="")) # Balance tasks by splitting 1:nb_curves into groups of size <= nb_series_per_chunk indices_workers <- .splitIndices(seq_len(nb_curves), nb_series_per_chunk) ignored <- if (parll) parallel::parLapply(cl, indices_workers, computeSynchronesChunk) else lapply(indices_workers, computeSynchronesChunk) if (parll) parallel::stopCluster(cl) return (synchrones[,]) }