[epclust.git] / epclust / R / computeSynchrones.R

#' computeSynchrones
#'
#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
#' using euclidian distance.
#'
#' @param medoids matrix of medoids in columns (curves of same length as the series)
#' @param getSeries Function to retrieve series (argument: 'indices', integer vector)
#' @param nb_curves How many series? (this is known, at this stage)
#' @inheritParams claws
#'
#' @return A matrix of K synchrones in columns (same length as the series)
#'
#' @export
computeSynchrones = function(medoids, getSeries, nb_curves,
	nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
{
	# Synchrones computation is embarassingly parallel: compute it by chunks of series
	computeSynchronesChunk = function(indices)
	{
		if (parll)
		{
			require("bigmemory", quietly=TRUE)
			requireNamespace("synchronicity", quietly=TRUE)
			require("epclust", quietly=TRUE)
			# The big.matrix objects need to be attached to be usable on the workers
			synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
			medoids <- bigmemory::attach.big.matrix(medoids_desc)
			m <- synchronicity::attach.mutex(m_desc)
		}

		# Obtain a chunk of reference series
		series_chunk = getSeries(indices)
		nb_series_chunk = ncol(series_chunk)

		# Get medoids indices for this chunk of series
		for (i in seq_len(nb_series_chunk))
			mi[i] <- which.min( colSums( sweep(medoids, 1, series_chunk[,i], '-')^2 ) )

		# Update synchrones using mi above, grouping it by values of mi (in 1...K)
		# to avoid too many lock/unlock
		for (i in seq_len(K))
		{
			# lock / unlock required because several writes at the same time
			if (parll)
				synchronicity::lock(m)
			synchrones[,i] = synchrones[,i] + rowSums(series_chunk[,mi==i])
			if (parll)
				synchronicity::unlock(m)
		}
		NULL
	}

	K = ncol(medoids)
	L = nrow(medoids)
	# Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
	synchrones = bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
	# NOTE: synchronicity is only for Linux & MacOS; on Windows: run sequentially
	parll = (parll && requireNamespace("synchronicity",quietly=TRUE)
		&& Sys.info()['sysname'] != "Windows")
	if (parll)
	{
		m <- synchronicity::boost.mutex() #for lock/unlock, see computeSynchronesChunk
		# mutex and big.matrix objects cannot be passed directly:
		# they will be accessed from their description
		m_desc <- synchronicity::describe(m)
		synchrones_desc = bigmemory::describe(synchrones)
		medoids <- bigmemory::as.big.matrix(medoids)
		medoids_desc <- bigmemory::describe(medoids)
		cl = parallel::makeCluster(ncores_clust)
		parallel::clusterExport(cl, envir=environment(),
			varlist=c("synchrones_desc","m_desc","medoids_desc","getRefSeries"))
	}

	if (verbose)
		cat(paste("--- Compute ",K," synchrones with ",nb_curves," series\n", sep=""))

	# Balance tasks by splitting 1:nb_ref_curves into groups of size <= nb_series_per_chunk
	indices_workers = .splitIndices(seq_len(nb_ref_curves), nb_series_per_chunk)
	ignored <-
		if (parll)
			parallel::parLapply(cl, indices_workers, computeSynchronesChunk)
		else
			lapply(indices_workers, computeSynchronesChunk)

	if (parll)
		parallel::stopCluster(cl)

	return (synchrones[,])
}
Commit	Line	Data
	1	#' computeSynchrones
	2	#'
	3	#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
	4	#' using euclidian distance.
	5	#'
	6	#' @param medoids matrix of medoids in columns (curves of same length as the series)
	7	#' @param getSeries Function to retrieve series (argument: 'indices', integer vector)
	8	#' @param nb_curves How many series? (this is known, at this stage)
	9	#' @inheritParams claws
	10	#'
	11	#' @return A matrix of K synchrones in columns (same length as the series)
	12	#'
	13	#' @export
	14	computeSynchrones = function(medoids, getSeries, nb_curves,
	15	nb_series_per_chunk, ncores_clust=1,verbose=FALSE,parll=TRUE)
	16	{
	17	# Synchrones computation is embarassingly parallel: compute it by chunks of series
	18	computeSynchronesChunk = function(indices)
	19	{
	20	if (parll)
	21	{
	22	require("bigmemory", quietly=TRUE)
	23	requireNamespace("synchronicity", quietly=TRUE)
	24	require("epclust", quietly=TRUE)
	25	# The big.matrix objects need to be attached to be usable on the workers
	26	synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
	27	medoids <- bigmemory::attach.big.matrix(medoids_desc)
	28	m <- synchronicity::attach.mutex(m_desc)
	29	}
	30
	31	# Obtain a chunk of reference series
	32	series_chunk = getSeries(indices)
	33	nb_series_chunk = ncol(series_chunk)
	34
	35	# Get medoids indices for this chunk of series
	36	for (i in seq_len(nb_series_chunk))
	37	mi[i] <- which.min( colSums( sweep(medoids, 1, series_chunk[,i], '-')^2 ) )
	38
	39	# Update synchrones using mi above, grouping it by values of mi (in 1...K)
	40	# to avoid too many lock/unlock
	41	for (i in seq_len(K))
	42	{
	43	# lock / unlock required because several writes at the same time
	44	if (parll)
	45	synchronicity::lock(m)
	46	synchrones[,i] = synchrones[,i] + rowSums(series_chunk[,mi==i])
	47	if (parll)
	48	synchronicity::unlock(m)
	49	}
	50	NULL
	51	}
	52
	53	K = ncol(medoids)
	54	L = nrow(medoids)
	55	# Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
	56	synchrones = bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
	57	# NOTE: synchronicity is only for Linux & MacOS; on Windows: run sequentially
	58	parll = (parll && requireNamespace("synchronicity",quietly=TRUE)
	59	&& Sys.info()['sysname'] != "Windows")
	60	if (parll)
	61	{
	62	m <- synchronicity::boost.mutex() #for lock/unlock, see computeSynchronesChunk
	63	# mutex and big.matrix objects cannot be passed directly:
	64	# they will be accessed from their description
	65	m_desc <- synchronicity::describe(m)
	66	synchrones_desc = bigmemory::describe(synchrones)
	67	medoids <- bigmemory::as.big.matrix(medoids)
	68	medoids_desc <- bigmemory::describe(medoids)
	69	cl = parallel::makeCluster(ncores_clust)
	70	parallel::clusterExport(cl, envir=environment(),
	71	varlist=c("synchrones_desc","m_desc","medoids_desc","getRefSeries"))
	72	}
	73
	74	if (verbose)
	75	cat(paste("--- Compute ",K," synchrones with ",nb_curves," series\n", sep=""))
	76
	77	# Balance tasks by splitting 1:nb_ref_curves into groups of size <= nb_series_per_chunk
	78	indices_workers = .splitIndices(seq_len(nb_ref_curves), nb_series_per_chunk)
	79	ignored <-
	80	if (parll)
	81	parallel::parLapply(cl, indices_workers, computeSynchronesChunk)
	82	else
	83	lapply(indices_workers, computeSynchronesChunk)
	84
	85	if (parll)
	86	parallel::stopCluster(cl)
	87
	88	return (synchrones[,])
	89	}