X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2FcomputeWerDists.R;h=0ad5404e07fffa9ac0524b27ee7f5ad6961dc2c4;hb=14c10f2d252f45349e0b4fbf87e17dfbfae39f92;hp=061c3609a54bc10928c5cd36702ad326f34b0eb6;hpb=282342bafdc9ff65c5df98c6e2304d63b33b9fb2;p=epclust.git

diff --git a/epclust/R/computeWerDists.R b/epclust/R/computeWerDists.R
index 061c360..0ad5404 100644
--- a/epclust/R/computeWerDists.R
+++ b/epclust/R/computeWerDists.R
@@ -1,17 +1,19 @@
 #' computeWerDists
 #'
-#' Compute the WER distances between the synchrones curves (in columns), which are
-#' returned (e.g.) by \code{computeSynchrones()}
+#' Compute the WER distances between the series at specified indices, which are
+#' obtaind by \code{getSeries(indices)}
 #'
-#' @param indices Range of series indices to cluster
+#' @param indices Indices of the series to consider
+#' @param getSeries Function to retrieve series (argument: 'inds', integer vector),
+#'   as columns of a matrix
+#' @param ncores Number of cores for parallel runs
 #' @inheritParams claws
-#' @inheritParams computeSynchrones
 #'
 #' @return A distances matrix of size K x K where K == length(indices)
 #'
 #' @export
-computeWerDists <- function(indices, getSeries, nb_series_per_chunk, smooth_lvl, nvoice,
-	nbytes, endian, ncores_clust=1, verbose=FALSE, parll=TRUE)
+computeWerDists <- function(indices, getSeries, nb_series_per_chunk, smooth_lvl=3, nvoice=4,
+	nbytes=4, endian=.Platform$endian, ncores=3, verbose=FALSE)
 {
 	n <- length(indices)
 	L <- length(getSeries(1)) #TODO: not very neat way to get L
@@ -25,17 +27,13 @@ computeWerDists <- function(indices, getSeries, nb_series_per_chunk, smooth_lvl,
 
 	cwt_file <- tempfile(pattern="epclust_cwt.bin_")
 	# Compute the getSeries(indices) CWT, and store the results in the binary file
-	computeSaveCWT <- function(indices)
+	computeSaveCWT <- function(inds)
 	{
-		if (parll)
-		{
-			require("bigmemory", quietly=TRUE)
-			require("Rwave", quietly=TRUE)
-			require("epclust", quietly=TRUE)
-		}
+		if (verbose)
+			cat("   Compute save CWT on ",length(inds)," indices\n", sep="")
 
 		# Obtain CWT as big vectors of real part + imaginary part (concatenate)
-		ts_cwt <- sapply(indices, function(i) {
+		ts_cwt <- sapply(inds, function(i) {
 			ts <- scale(ts(getSeries(i)), center=TRUE, scale=FALSE)
 			ts_cwt <- Rwave::cwt(ts, noctave, nvoice, w0=2*pi, twoD=TRUE, plot=FALSE)
 			c( as.double(Re(ts_cwt)),as.double(Im(ts_cwt)) )
@@ -55,18 +53,17 @@ computeWerDists <- function(indices, getSeries, nb_series_per_chunk, smooth_lvl,
 		re_part + 1i * im_part
 	}
 
-	# Compute distance between columns i and j for j>i
+	# Compute distances between columns i and j for j>i
 	computeDistances <- function(i)
 	{
 		if (parll)
 		{
 			# parallel workers start with an empty environment
-			require("bigmemory", quietly=TRUE)
 			require("epclust", quietly=TRUE)
 			Xwer_dist <- bigmemory::attach.big.matrix(Xwer_dist_desc)
 		}
 
-		if (verbose && !parll)
+		if (verbose)
 			cat(paste("   Distances from ",i," to ",i+1,"...",n,"\n", sep=""))
 
 		# Get CWT of column i, and run computations for columns j>i
@@ -78,7 +75,7 @@ computeWerDists <- function(indices, getSeries, nb_series_per_chunk, smooth_lvl,
 			cwt_j <- getCWT(j, L)
 
 			# Compute the ratio of integrals formula 5.6 for WER^2
-			# in https://arxiv.org/abs/1101.4744v2 Â§5.3
+			# in https://arxiv.org/abs/1101.4744v2 paragraph 5.3
 			num <- filterMA(Mod(cwt_i * Conj(cwt_j)), smooth_lvl)
 			WY <- filterMA(Mod(cwt_j * Conj(cwt_j)), smooth_lvl)
 			wer2 <- sum(colSums(num)^2) / sum(colSums(WX) * colSums(WY))
@@ -89,26 +86,31 @@ computeWerDists <- function(indices, getSeries, nb_series_per_chunk, smooth_lvl,
 		Xwer_dist[i,i] <- 0.
 	}
 
+	if (verbose)
+		cat(paste("--- Precompute and serialize synchrones CWT\n", sep=""))
+
+	# Split indices by packets of length at most nb_cwt_per_chunk
+	indices_cwt <- .splitIndices(indices, nb_cwt_per_chunk)
+	# NOTE: next loop could potentially be run in //. Indices would be permuted (by
+	# serialization order), and synchronicity would be required because of concurrent
+	# writes. Probably not worth the effort - but possible.
+	for (inds in indices_cwt)
+		computeSaveCWT(inds)
+
+	parll <- (ncores > 1)
 	if (parll)
 	{
 		# outfile=="" to see stderr/stdout on terminal
-		cl <- parallel::makeCluster(ncores_clust, outfile="")
+		cl <-
+			if (verbose)
+				parallel::makeCluster(ncores, outfile="")
+			else
+				parallel::makeCluster(ncores)
 		Xwer_dist_desc <- bigmemory::describe(Xwer_dist)
-		parallel::clusterExport(cl, varlist=c("parll","nb_cwt_per_chunk","n","L",
-			"Xwer_dist_desc","noctave","nvoice","getCWT"), envir=environment())
+		parallel::clusterExport(cl, envir=environment(),
+			varlist=c("parll","n","L","Xwer_dist_desc","getCWT","verbose"))
 	}
 
-	if (verbose)
-		cat(paste("--- Precompute and serialize synchrones CWT\n", sep=""))
-
-	# Split indices by packets of length at most nb_cwt_per_chunk
-	indices_cwt <- .splitIndices(seq_len(n), nb_cwt_per_chunk)
-	ignored <-
-		if (parll)
-			parallel::parLapply(cl, indices_cwt, computeSaveCWT)
-		else
-			lapply(indices_cwt, computeSaveCWT)
-
 	if (verbose)
 		cat(paste("--- Compute WER distances\n", sep=""))