3c1959a62433282034a6176b8ec4b5c79f4b6af4
[epclust.git] / epclust / R / computeSynchrones.R
1 #' computeSynchrones
2 #'
3 #' Compute the synchrones curves (sums of clusters elements) from a matrix of medoids,
4 #' using euclidian distance.
5 #'
6 #' @param medoids matrix of K medoids curves in columns
7 #' @param nb_curves How many series? (this is known, at this stage)
8 #' @inheritParams claws
9 #' @inheritParams computeWerDists
10 #'
11 #' @return A matrix of K synchrones in columns (same length as the series)
12 #'
13 #' @export
14 computeSynchrones <- function(medoids, getSeries, nb_curves,
15 nb_series_per_chunk, ncores=3, verbose=FALSE, parll=TRUE)
16 {
17 # Synchrones computation is embarassingly parallel: compute it by chunks of series
18 computeSynchronesChunk <- function(indices)
19 {
20 # Obtain a chunk of reference series
21 series_chunk <- getSeries(indices)
22 nb_series_chunk <- ncol(series_chunk)
23
24 # Get medoids indices for this chunk of series
25 mi <- assignMedoids(series_chunk, medoids[,])
26
27 # Update synchrones using mi above, grouping it by values of mi (in 1...K)
28 # to avoid too many lock/unlock
29 for (i in seq_len(K))
30 {
31 # lock / unlock required because several writes at the same time
32 if (parll)
33 synchronicity::lock(m)
34 synchrones[,i] <- synchrones[,i] + rowSums(as.matrix(series_chunk[,mi==i]))
35 if (parll)
36 synchronicity::unlock(m)
37 }
38 NULL
39 }
40
41 K <- ncol(medoids)
42 L <- nrow(medoids)
43 # Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
44 synchrones <- bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
45 # NOTE: synchronicity is only for Linux & MacOS; on Windows: run sequentially
46 parll <- (parll && requireNamespace("synchronicity",quietly=TRUE)
47 && Sys.info()['sysname'] != "Windows")
48
49 if (parll)
50 m <- synchronicity::boost.mutex() #for lock/unlock, see computeSynchronesChunk
51
52 if (verbose)
53 cat(paste("--- Compute ",K," synchrones with ",nb_curves," series\n", sep=""))
54
55 # Balance tasks by splitting 1:nb_curves into groups of size <= nb_series_per_chunk
56 indices_workers <- .splitIndices(seq_len(nb_curves), nb_series_per_chunk)
57 ignored <-
58 if (parll)
59 {
60 parallel::mclapply(indices_workers,
61 function(inds) computeSynchronesChunk(inds), mc.cores=ncores)
62 }
63 else
64 lapply(indices_workers, computeSynchronesChunk)
65
66 return (synchrones[,])
67 }