[epclust.git] / epclust / R / computeSynchrones.R

#' computeSynchrones
#'
#' Compute the synchrones curves (sums of clusters elements) from a matrix of medoids,
#' using euclidian distance.
#'
#' @param medoids matrix of K medoids curves in columns
#' @param nb_curves How many series? (this is known, at this stage)
#' @inheritParams claws
#' @inheritParams computeWerDists
#'
#' @return A matrix of K synchrones in columns (same length as the series)
#'
#' @export
computeSynchrones <- function(medoids, getSeries, nb_curves,
	nb_series_per_chunk, ncores=3, verbose=FALSE)
{
	# Synchrones computation is embarassingly parallel: compute it by chunks of series
	computeSynchronesChunk <- function(indices)
	{
		# Obtain a chunk of reference series
		series_chunk <- getSeries(indices)
		nb_series_chunk <- ncol(series_chunk)

		# Get medoids indices for this chunk of series
		mi <- assignMedoids(series_chunk, medoids[,])

		# Update synchrones using mi above, grouping it by values of mi (in 1...K)
		# to avoid too many lock/unlock
		for (i in seq_len(K))
		{
			# lock / unlock required because several writes at the same time
			if (parll)
				synchronicity::lock(m)
			synchrones[,i] <- synchrones[,i] + rowSums(as.matrix(series_chunk[,mi==i]))
			if (parll)
				synchronicity::unlock(m)
		}
		NULL
	}

	K <- ncol(medoids)
	L <- nrow(medoids)
	# Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
	synchrones <- bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
	# NOTE: synchronicity is only for Linux & MacOS; on Windows: run sequentially
	parll <- (ncores > 1 && requireNamespace("synchronicity",quietly=TRUE)
		&& Sys.info()['sysname'] != "Windows")

	if (parll)
		m <- synchronicity::boost.mutex() #for lock/unlock, see computeSynchronesChunk

	if (verbose)
		cat(paste("--- Compute ",K," synchrones with ",nb_curves," series\n", sep=""))

	# Balance tasks by splitting 1:nb_curves into groups of size <= nb_series_per_chunk
	indices_workers <- .splitIndices(seq_len(nb_curves), nb_series_per_chunk)
	ignored <-
		if (parll)
		{
			parallel::mclapply(indices_workers,
				function(inds) computeSynchronesChunk(inds), mc.cores=ncores)
		}
		else
			lapply(indices_workers, computeSynchronesChunk)

	return (synchrones[,])
}
Commit	Line	Data
	1	#' computeSynchrones
	2	#'
	3	#' Compute the synchrones curves (sums of clusters elements) from a matrix of medoids,
	4	#' using euclidian distance.
	5	#'
	6	#' @param medoids matrix of K medoids curves in columns
	7	#' @param nb_curves How many series? (this is known, at this stage)
	8	#' @inheritParams claws
	9	#' @inheritParams computeWerDists
	10	#'
	11	#' @return A matrix of K synchrones in columns (same length as the series)
	12	#'
	13	#' @export
	14	computeSynchrones <- function(medoids, getSeries, nb_curves,
	15	nb_series_per_chunk, ncores=3, verbose=FALSE)
	16	{
	17	# Synchrones computation is embarassingly parallel: compute it by chunks of series
	18	computeSynchronesChunk <- function(indices)
	19	{
	20	# Obtain a chunk of reference series
	21	series_chunk <- getSeries(indices)
	22	nb_series_chunk <- ncol(series_chunk)
	23
	24	# Get medoids indices for this chunk of series
	25	mi <- assignMedoids(series_chunk, medoids[,])
	26
	27	# Update synchrones using mi above, grouping it by values of mi (in 1...K)
	28	# to avoid too many lock/unlock
	29	for (i in seq_len(K))
	30	{
	31	# lock / unlock required because several writes at the same time
	32	if (parll)
	33	synchronicity::lock(m)
	34	synchrones[,i] <- synchrones[,i] + rowSums(as.matrix(series_chunk[,mi==i]))
	35	if (parll)
	36	synchronicity::unlock(m)
	37	}
	38	NULL
	39	}
	40
	41	K <- ncol(medoids)
	42	L <- nrow(medoids)
	43	# Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
	44	synchrones <- bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
	45	# NOTE: synchronicity is only for Linux & MacOS; on Windows: run sequentially
	46	parll <- (ncores > 1 && requireNamespace("synchronicity",quietly=TRUE)
	47	&& Sys.info()['sysname'] != "Windows")
	48
	49	if (parll)
	50	m <- synchronicity::boost.mutex() #for lock/unlock, see computeSynchronesChunk
	51
	52	if (verbose)
	53	cat(paste("--- Compute ",K," synchrones with ",nb_curves," series\n", sep=""))
	54
	55	# Balance tasks by splitting 1:nb_curves into groups of size <= nb_series_per_chunk
	56	indices_workers <- .splitIndices(seq_len(nb_curves), nb_series_per_chunk)
	57	ignored <-
	58	if (parll)
	59	{
	60	parallel::mclapply(indices_workers,
	61	function(inds) computeSynchronesChunk(inds), mc.cores=ncores)
	62	}
	63	else
	64	lapply(indices_workers, computeSynchronesChunk)
	65
	66	return (synchrones[,])
	67	}