epclust/R/computeSynchrones.R

   1 #' computeSynchrones
   2 #'
   3 #' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
   4 #' using euclidian distance.
   5 #'
   6 #' @param medoids matrix of medoids in columns (curves of same length as the series)
   7 #' @param getSeries Function to retrieve series (argument: 'indices', integer vector)
   8 #' @param nb_curves How many series? (this is known, at this stage)
   9 #' @inheritParams claws
  10 #'
  11 #' @return A matrix of K synchrones in columns (same length as the series)
  12 #'
  13 #' @export
  14 computeSynchrones = function(medoids, getSeries, nb_curves,
  15     nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
  16 {
  17     # Synchrones computation is embarassingly parallel: compute it by chunks of series
  18     computeSynchronesChunk = function(indices)
  19     {
  20         if (parll)
  21         {
  22             require("bigmemory", quietly=TRUE)
  23             requireNamespace("synchronicity", quietly=TRUE)
  24             require("epclust", quietly=TRUE)
  25             # The big.matrix objects need to be attached to be usable on the workers
  26             synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
  27             medoids <- bigmemory::attach.big.matrix(medoids_desc)
  28             m <- synchronicity::attach.mutex(m_desc)
  29         }
  30
  31         # Obtain a chunk of reference series
  32         series_chunk = getSeries(indices)
  33         nb_series_chunk = ncol(series_chunk)
  34
  35         # Get medoids indices for this chunk of series
  36         for (i in seq_len(nb_series_chunk))
  37             mi[i] <- which.min( colSums( sweep(medoids, 1, series_chunk[,i], '-')^2 ) )
  38
  39         # Update synchrones using mi above, grouping it by values of mi (in 1...K)
  40         # to avoid too many lock/unlock
  41         for (i in seq_len(K))
  42         {
  43             # lock / unlock required because several writes at the same time
  44             if (parll)
  45                 synchronicity::lock(m)
  46             synchrones[,i] = synchrones[,i] + rowSums(series_chunk[,mi==i])
  47             if (parll)
  48                 synchronicity::unlock(m)
  49         }
  50         NULL
  51     }
  52
  53     K = ncol(medoids)
  54     L = nrow(medoids)
  55     # Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
  56     synchrones = bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
  57     # NOTE: synchronicity is only for Linux & MacOS; on Windows: run sequentially
  58     parll = (parll && requireNamespace("synchronicity",quietly=TRUE)
  59         && Sys.info()['sysname'] != "Windows")
  60     if (parll)
  61     {
  62         m <- synchronicity::boost.mutex() #for lock/unlock, see computeSynchronesChunk
  63         # mutex and big.matrix objects cannot be passed directly:
  64         # they will be accessed from their description
  65         m_desc <- synchronicity::describe(m)
  66         synchrones_desc = bigmemory::describe(synchrones)
  67         medoids <- bigmemory::as.big.matrix(medoids)
  68         medoids_desc <- bigmemory::describe(medoids)
  69         cl = parallel::makeCluster(ncores_clust)
  70         parallel::clusterExport(cl, envir=environment(),
  71             varlist=c("synchrones_desc","m_desc","medoids_desc","getSeries"))
  72     }
  73
  74     if (verbose)
  75         cat(paste("--- Compute ",K," synchrones with ",nb_curves," series\n", sep=""))
  76
  77     # Balance tasks by splitting 1:nb_curves into groups of size <= nb_series_per_chunk
  78     indices_workers = .splitIndices(seq_len(nb_curves), nb_series_per_chunk)
  79     ignored <-
  80         if (parll)
  81             parallel::parLapply(cl, indices_workers, computeSynchronesChunk)
  82         else
  83             lapply(indices_workers, computeSynchronesChunk)
  84
  85     if (parll)
  86         parallel::stopCluster(cl)
  87
  88     return (synchrones[,])
  89 }