- cat(paste("*** Clustering task 2 on ",nrow(medoids)," lines\n", sep=""))
-
- if (nrow(medoids) <= K2)
- return (medoids)
- synchrones = computeSynchrones(medoids,
- getRefSeries, nb_ref_curves, nb_series_per_chunk, ncores_clust, verbose, parll)
- distances = computeWerDists(synchrones, ncores_clust, verbose, parll)
- medoids[ computeClusters2(distances,K2,verbose), ]
-}
-
-#' @rdname clustering
-#' @export
-computeClusters1 = function(contribs, K1, verbose=FALSE)
-{
- if (verbose)
- cat(paste(" computeClusters1() on ",nrow(contribs)," lines\n", sep=""))
- cluster::pam(contribs, K1, diss=FALSE)$id.med
-}
-
-#' @rdname clustering
-#' @export
-computeClusters2 = function(distances, K2, verbose=FALSE)
-{
- if (verbose)
- cat(paste(" computeClusters2() on ",nrow(distances)," lines\n", sep=""))
- cluster::pam(distances, K2, diss=TRUE)$id.med
-}
-
-#' computeSynchrones
-#'
-#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
-#' using L2 distances.
-#'
-#' @param medoids big.matrix of medoids (curves of same length as initial series)
-#' @param getRefSeries Function to retrieve initial series (e.g. in stage 2 after series
-#' have been replaced by stage-1 medoids)
-#' @param nb_ref_curves How many reference series? (This number is known at this stage)
-#' @inheritParams claws
-#'
-#' @return A big.matrix of size K1 x L where L = data_length
-#'
-#' @export
-computeSynchrones = function(medoids, getRefSeries,
- nb_ref_curves, nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
-{
- if (verbose)
- cat(paste("--- Compute synchrones\n", sep=""))
-
- computeSynchronesChunk = function(indices)
- {
- ref_series = getRefSeries(indices)
- nb_series = nrow(ref_series)
-
- if (parll)
- {
- require("bigmemory", quietly=TRUE)
- require("synchronicity", quietly=TRUE)
- require("epclust", quietly=TRUE)
- synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
- medoids <- bigmemory::attach.big.matrix(medoids_desc)
- m <- synchronicity::attach.mutex(m_desc)
- }
-
-
-
-#TODO: use dbs(),
- #https://www.r-bloggers.com/debugging-parallel-code-with-dbs/
- #http://gforge.se/2015/02/how-to-go-parallel-in-r-basics-tips/
-
-#OK ::
-#write(length(indices), file="TOTO")
-#write( computeMedoidsIndices(medoids@address, getRefSeries(indices[1:600])), file="TOTO")
-#stop()
-
-# write(indices, file="TOTO", ncolumns=10, append=TRUE)
-#write("medoids", file = "TOTO", ncolumns=1, append=TRUE)
-#write(medoids[1,1:3], file = "TOTO", ncolumns=1, append=TRUE)
-#write("synchrones", file = "TOTO", ncolumns=1, append=TRUE)
-#write(synchrones[1,1:3], file = "TOTO", ncolumns=1, append=TRUE)
-
-#NOT OK :: (should just be "ref_series") ...or yes ? race problems mutex then ? ?!
- #get medoids indices for this chunk of series
- mi = computeMedoidsIndices(medoids@address, getRefSeries(indices[1:600])) #ref_series)
-write("MI ::::", file = "TOTO", ncolumns=1, append=TRUE)
-write(mi[1:3], file = "TOTO", ncolumns=1, append=TRUE)
-
-# #R-equivalent, requiring a matrix (thus potentially breaking "fit-in-memory" hope)
-# mat_meds = medoids[,]
-# mi = rep(NA,nb_series)
-# for (i in 1:nb_series)
-# mi[i] <- which.min( rowSums( sweep(mat_meds, 2, ref_series[i,], '-')^2 ) )
-# rm(mat_meds); gc()
-
- for (i in seq_len(nb_series))
- {
- if (parll)
- synchronicity::lock(m)
- synchrones[mi[i],] = synchrones[mi[i],] + ref_series[i,]
- counts[mi[i],1] = counts[mi[i],1] + 1
- if (parll)
- synchronicity::unlock(m)
- }
- }
-
- K = nrow(medoids) ; L = ncol(medoids)
- # Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
- # TODO: if size > RAM (not our case), use file-backed big.matrix
- synchrones = bigmemory::big.matrix(nrow=K, ncol=L, type="double", init=0.)
- counts = bigmemory::big.matrix(nrow=K, ncol=1, type="double", init=0)
- # synchronicity is only for Linux & MacOS; on Windows: run sequentially
- parll = (requireNamespace("synchronicity",quietly=TRUE)
- && parll && Sys.info()['sysname'] != "Windows")
- if (parll)
- {
- m <- synchronicity::boost.mutex()
- m_desc <- synchronicity::describe(m)
- synchrones_desc = bigmemory::describe(synchrones)
- medoids_desc = bigmemory::describe(medoids)