X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fclustering.R;h=3e7fd3866922d1c56b48741642ca88fd55d5082c;hb=dc86eb0c992e6e4ab119d48398d040c4cf3a75fd;hp=bea073a660e3c4f201546caa87d539522d4671a9;hpb=40f12a2f66d06fd77183ea02b996f5c66f90761c;p=epclust.git

diff --git a/epclust/R/clustering.R b/epclust/R/clustering.R
index bea073a..3e7fd38 100644
--- a/epclust/R/clustering.R
+++ b/epclust/R/clustering.R
@@ -1,44 +1,51 @@
-#' @name clustering
-#' @rdname clustering
-#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
+#' Two-stage clustering, within one task (see \code{claws()})
 #'
-#' @title Two-stage clustering, withing one task (see \code{claws()})
+#' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
+#' clustering on nb_curves / ntasks energy contributions, computed through
+#' discrete wavelets coefficients.
+#' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
+#' computations between medoids (indices) output from stage 1, before applying
+#' the second clustering algorithm on the distances matrix.
 #'
-#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
-#'   iterated stage 1 clustering on nb_curves / ntasks energy contributions, computed
-#'   through discrete wavelets coefficients.
-#'   \code{clusteringTask2()} runs a full stage-2 task, which consists in
-#'   WER distances computations between medoids indices output from stage 1,
-#'   before applying the second clustering algorithm, on the distances matrix.
-#'
-#' @param indices Range of series indices to cluster
 #' @param getContribs Function to retrieve contributions from initial series indices:
-#'   \code{getContribs(indices)} outputs a contributions matrix
+#'   \code{getContribs(indices)} outputs a contributions matrix, in columns
 #' @inheritParams claws
 #' @inheritParams computeSynchrones
+#' @inheritParams computeWerDists
+#'
+#' @return The indices of the computed (resp. K1 and K2) medoids.
 #'
-#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
-#'   Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
-#'   outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
+#' @name clustering
+#' @rdname clustering
+#' @aliases clusteringTask1 clusteringTask2
 NULL
 
 #' @rdname clustering
 #' @export
-clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_chunk,
-	ncores_clust=1, verbose=FALSE, parll=TRUE)
+clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust,
+	ncores_clust=3, verbose=FALSE, parll=TRUE)
 {
+	if (verbose)
+		cat(paste("*** Clustering task 1 on ",length(indices)," series\n", sep=""))
+
+	if (length(indices) <= K1)
+		return (indices)
+
 	if (parll)
 	{
-		cl = parallel::makeCluster(ncores_clust, outfile = "")
+		# outfile=="" to see stderr/stdout on terminal
+		cl <-
+			if (verbose)
+				parallel::makeCluster(ncores_clust, outfile = "")
+			else
+				parallel::makeCluster(ncores_clust)
 		parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
 	}
 	# Iterate clustering algorithm 1 until K1 medoids are found
 	while (length(indices) > K1)
 	{
 		# Balance tasks by splitting the indices set - as evenly as possible
-		indices_workers = .splitIndices(indices, nb_items_clust1)
-		if (verbose)
-			cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
+		indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1)
 		indices <-
 			if (parll)
 			{
@@ -53,6 +60,11 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_c
 					inds[ algoClust1(getContribs(inds), K1) ]
 				) )
 			}
+		if (verbose)
+		{
+			cat(paste("*** [iterated] Clustering task 1: now ",
+				length(indices)," medoids\n", sep=""))
+		}
 	}
 	if (parll)
 		parallel::stopCluster(cl)
@@ -62,26 +74,21 @@ clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_series_per_c
 
 #' @rdname clustering
 #' @export
-clusteringTask2 = function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
-	nvoice, nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
+clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
+	smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE, parll=TRUE)
 {
 	if (verbose)
-		cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))
-
-	if (ncol(medoids) <= K2)
-		return (medoids)
+		cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))
 
-	# A) Obtain synchrones, that is to say the cumulated power consumptions
-	#    for each of the K1 initial groups
-	synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
-		nb_series_per_chunk, ncores_clust, verbose, parll)
+	if (length(indices) <= K2)
+		return (indices)
 
-	# B) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
-	distances = computeWerDists(
-		synchrones, nvoice, nbytes, endian, ncores_clust, verbose, parll)
+	# A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
+	distances <- computeWerDists(indices, getSeries, nb_series_per_chunk,
+		smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose, parll)
 
-	# C) Apply clustering algorithm 2 on the WER distances matrix
+	# B) Apply clustering algorithm 2 on the WER distances matrix
 	if (verbose)
 		cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
-	medoids[ ,algoClust2(distances,K2) ]
+	indices[ algoClust2(distances,K2) ]
 }