[epclust.git] / epclust / R / clustering.R

#' Two-stage clustering, within one task (see \code{claws()})
#'
#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
#' stage 1 clustering on nb_curves / ntasks energy contributions, computed through
#' discrete wavelets coefficients.
#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
#' computations between medoids (indices) output from stage 1, before applying
#' the second clustering algorithm on the distances matrix.
#'
#' @param getContribs Function to retrieve contributions from initial series indices:
#'   \code{getContribs(indices)} outputs a contributions matrix
#' @inheritParams claws
#' @inheritParams computeSynchrones
#' @inheritParams computeWerDists
#'
#' @return The indices of the computed (resp. K1 and K2) medoids.
#'
#' @name clustering
#' @rdname clustering
#' @aliases clusteringTask1 clusteringTask2
NULL

#' @rdname clustering
#' @export
clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust,
	ncores_clust=1, verbose=FALSE, parll=TRUE)
{
	if (parll)
	{
		cl = parallel::makeCluster(ncores_clust, outfile = "")
		parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
	}
	# Iterate clustering algorithm 1 until K1 medoids are found
	while (length(indices) > K1)
	{
		# Balance tasks by splitting the indices set - as evenly as possible
		indices_workers = .splitIndices(indices, nb_items_clust, min_size=K1+1)
		if (verbose)
			cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
		indices <-
			if (parll)
			{
				unlist( parallel::parLapply(cl, indices_workers, function(inds) {
					require("epclust", quietly=TRUE)
					inds[ algoClust1(getContribs(inds), K1) ]
				}) )
			}
			else
			{
				unlist( lapply(indices_workers, function(inds)
					inds[ algoClust1(getContribs(inds), K1) ]
				) )
			}
	}
	if (parll)
		parallel::stopCluster(cl)

	indices #medoids
}

#' @rdname clustering
#' @export
clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
	nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
{
	if (verbose)
		cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))

	if (length(indices) <= K2)
		return (indices)

	# A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
	distances = computeWerDists(indices, getSeries, nb_series_per_chunk,
		nvoice, nbytes, endian, ncores_clust, verbose, parll)

	# B) Apply clustering algorithm 2 on the WER distances matrix
	if (verbose)
		cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
	indices[ algoClust2(distances,K2) ]
}
Commit	Line	Data
	1	#' Two-stage clustering, within one task (see \code{claws()})
	2	#'
	3	#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
	4	#' stage 1 clustering on nb_curves / ntasks energy contributions, computed through
	5	#' discrete wavelets coefficients.
	6	#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
	7	#' computations between medoids (indices) output from stage 1, before applying
	8	#' the second clustering algorithm on the distances matrix.
	9	#'
	10	#' @param getContribs Function to retrieve contributions from initial series indices:
	11	#' \code{getContribs(indices)} outputs a contributions matrix
	12	#' @inheritParams claws
	13	#' @inheritParams computeSynchrones
	14	#' @inheritParams computeWerDists
	15	#'
	16	#' @return The indices of the computed (resp. K1 and K2) medoids.
	17	#'
	18	#' @name clustering
	19	#' @rdname clustering
	20	#' @aliases clusteringTask1 clusteringTask2
	21	NULL
	22
	23	#' @rdname clustering
	24	#' @export
	25	clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust,
	26	ncores_clust=1, verbose=FALSE, parll=TRUE)
	27	{
	28	if (parll)
	29	{
	30	cl = parallel::makeCluster(ncores_clust, outfile = "")
	31	parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
	32	}
	33	# Iterate clustering algorithm 1 until K1 medoids are found
	34	while (length(indices) > K1)
	35	{
	36	# Balance tasks by splitting the indices set - as evenly as possible
	37	indices_workers = .splitIndices(indices, nb_items_clust, min_size=K1+1)
	38	if (verbose)
	39	cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
	40	indices <-
	41	if (parll)
	42	{
	43	unlist( parallel::parLapply(cl, indices_workers, function(inds) {
	44	require("epclust", quietly=TRUE)
	45	inds[ algoClust1(getContribs(inds), K1) ]
	46	}) )
	47	}
	48	else
	49	{
	50	unlist( lapply(indices_workers, function(inds)
	51	inds[ algoClust1(getContribs(inds), K1) ]
	52	) )
	53	}
	54	}
	55	if (parll)
	56	parallel::stopCluster(cl)
	57
	58	indices #medoids
	59	}
	60
	61	#' @rdname clustering
	62	#' @export
	63	clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
	64	nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
	65	{
	66	if (verbose)
	67	cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))
	68
	69	if (length(indices) <= K2)
	70	return (indices)
	71
	72	# A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
	73	distances = computeWerDists(indices, getSeries, nb_series_per_chunk,
	74	nvoice, nbytes, endian, ncores_clust, verbose, parll)
	75
	76	# B) Apply clustering algorithm 2 on the WER distances matrix
	77	if (verbose)
	78	cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
	79	indices[ algoClust2(distances,K2) ]
	80	}