X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fmain.R;h=977e61b235da2ea1ac4e0fc3c4cd40bbddcc049f;hb=e161499b97c782aadfc287c22b55f85724f86fae;hp=892c64c631ffafc67058e5d491ba6e86923ca4ba;hpb=95b5c2e621af8949c5a5eed287d451817c16c24e;p=epclust.git

diff --git a/epclust/R/main.R b/epclust/R/main.R
index 892c64c..977e61b 100644
--- a/epclust/R/main.R
+++ b/epclust/R/main.R
@@ -7,8 +7,9 @@
 #' @param getSeries Access to the (time-)series, which can be of one of the three
 #'   following types:
 #'   \itemize{
-#'     \item matrix: each line contains all the values for one time-serie, ordered by time
-#'     \item connection: any R connection object (e.g. a file) providing lines as described above
+#'     \item [big.]matrix: each line contains all the values for one time-serie, ordered by time
+#'     \item connection: any R connection object providing lines as described above
+#'     \item character: name of a CSV file containing series in rows (no header)
 #'     \item function: a custom way to retrieve the curves; it has only one argument:
 #'       the indices of the series to be retrieved. See examples
 #'   }
@@ -32,7 +33,7 @@
 #' @param verbose Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage)
 #' @param parll TRUE to fully parallelize; otherwise run sequentially (debug, comparison)
 #'
-#' @return A matrix of the final medoids curves (K2) in rows
+#' @return A big.matrix of the final medoids curves (K2) in rows
 #'
 #' @examples
 #' \dontrun{
@@ -163,22 +164,14 @@ claws = function(getSeries, K1, K2,
 
 	runTwoStepClustering = function(inds)
 	{
-		if (parll)
+		if (parll && ntasks>1)
 			require("epclust", quietly=TRUE)
 		indices_medoids = clusteringTask1(
 			inds, getContribs, K1, nb_series_per_chunk, ncores_clust, verbose, parll)
 		if (WER=="mix")
 		{
-
-
-
-
-#TODO: getSeries(indices_medoids) BAD ; il faudrait une big.matrix de medoids en entree
-			#OK en RAM il y en aura 1000 (donc 1000*K1*17519... OK)
-			#...mais du coup chaque process ne re-dupliquera pas medoids
-
-
-			medoids2 = computeClusters2(getSeries(indices_medoids),
+			medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) )
+			medoids2 = clusteringTask2(medoids1,
 				K2, getSeries, nb_curves, nb_series_per_chunk, ncores_clust, verbose, parll)
 			binarize(medoids2, synchrones_file, nb_series_per_chunk, sep, nbytes, endian)
 			return (vector("integer",0))
@@ -193,25 +186,30 @@ claws = function(getSeries, K1, K2,
 		indices_all[((i-1)*nb_series_per_task+1):upper_bound]
 	})
 	if (verbose)
-		cat(paste("...Run ",ntasks," x stage 1 in parallel\n",sep=""))
+	{
+		message = paste("...Run ",ntasks," x stage 1", sep="")
+		if (WER=="mix")
+			message = paste(message," + stage 2", sep="")
+		cat(paste(message,"\n", sep=""))
+	}
 	if (WER=="mix")
 		{synchrones_file = paste(bin_dir,"synchrones",sep="") ; unlink(synchrones_file)}
-	if (parll)
+	if (parll && ntasks>1)
 	{
 		cl = parallel::makeCluster(ncores_tasks)
 		varlist = c("getSeries","getContribs","K1","K2","verbose","parll",
-			"nb_series_per_chunk","ncores_clust","sep","nbytes","endian")
+			"nb_series_per_chunk","ntasks","ncores_clust","sep","nbytes","endian")
 		if (WER=="mix")
 			varlist = c(varlist, "synchrones_file")
 		parallel::clusterExport(cl, varlist=varlist, envir = environment())
 	}
 
 	# 1000*K1 indices [if WER=="end"], or empty vector [if WER=="mix"] --> series on file
-	if (parll)
+	if (parll && ntasks>1)
 		indices = unlist( parallel::parLapply(cl, indices_tasks, runTwoStepClustering) )
 	else
 		indices = unlist( lapply(indices_tasks, runTwoStepClustering) )
-	if (parll)
+	if (parll && ntasks>1)
 		parallel::stopCluster(cl)
 
 	getRefSeries = getSeries
@@ -230,22 +228,19 @@ claws = function(getSeries, K1, K2,
 			contribs_file, nb_series_per_chunk, nbytes, endian)
 	}
 
-
-
-#TODO: if ntasks==1, c'est deja terminÃ©
-
 	# Run step2 on resulting indices or series (from file)
 	if (verbose)
 		cat("...Run final // stage 1 + stage 2\n")
 	indices_medoids = clusteringTask1(
 		indices, getContribs, K1, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll)
-	medoids = computeClusters2(getSeries(indices_medoids), K2,
+	medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) )
+	medoids2 = clusteringTask2(medoids1, K2,
 		getRefSeries, nb_curves, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll)
 
 	# Cleanup
 	unlink(bin_dir, recursive=TRUE)
 
-	medoids
+	medoids2
 }
 
 #' curvesToContribs