X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fmain.R;h=977e61b235da2ea1ac4e0fc3c4cd40bbddcc049f;hb=e161499b97c782aadfc287c22b55f85724f86fae;hp=bba0618899d4c26a97746f0ae702a51560fa0abe;hpb=c45fd66342e40c8b5387fc6f0059c4d3a9718340;p=epclust.git diff --git a/epclust/R/main.R b/epclust/R/main.R index bba0618..977e61b 100644 --- a/epclust/R/main.R +++ b/epclust/R/main.R @@ -7,8 +7,9 @@ #' @param getSeries Access to the (time-)series, which can be of one of the three #' following types: #' \itemize{ -#' \item matrix: each line contains all the values for one time-serie, ordered by time -#' \item connection: any R connection object (e.g. a file) providing lines as described above +#' \item [big.]matrix: each line contains all the values for one time-serie, ordered by time +#' \item connection: any R connection object providing lines as described above +#' \item character: name of a CSV file containing series in rows (no header) #' \item function: a custom way to retrieve the curves; it has only one argument: #' the indices of the series to be retrieved. See examples #' } @@ -32,7 +33,7 @@ #' @param verbose Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage) #' @param parll TRUE to fully parallelize; otherwise run sequentially (debug, comparison) #' -#' @return A matrix of the final medoids curves (K2) in rows +#' @return A big.matrix of the final medoids curves (K2) in rows #' #' @examples #' \dontrun{ @@ -144,7 +145,7 @@ claws = function(getSeries, K1, K2, getSeries = function(inds) getDataInFile(inds, series_file, nbytes, endian) } - # Serialize all computed wavelets contributions onto a file + # Serialize all computed wavelets contributions into a file contribs_file = paste(bin_dir,"contribs",sep="") ; unlink(contribs_file) index = 1 nb_curves = 0 @@ -163,13 +164,14 @@ claws = function(getSeries, K1, K2, runTwoStepClustering = function(inds) { - if (parll) + if (parll && ntasks>1) require("epclust", quietly=TRUE) indices_medoids = clusteringTask1( inds, getContribs, K1, nb_series_per_chunk, ncores_clust, verbose, parll) if (WER=="mix") { - medoids2 = computeClusters2(getSeries(indices_medoids), + medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) ) + medoids2 = clusteringTask2(medoids1, K2, getSeries, nb_curves, nb_series_per_chunk, ncores_clust, verbose, parll) binarize(medoids2, synchrones_file, nb_series_per_chunk, sep, nbytes, endian) return (vector("integer",0)) @@ -184,25 +186,30 @@ claws = function(getSeries, K1, K2, indices_all[((i-1)*nb_series_per_task+1):upper_bound] }) if (verbose) - cat(paste("...Run ",ntasks," x stage 1 in parallel\n",sep="")) + { + message = paste("...Run ",ntasks," x stage 1", sep="") + if (WER=="mix") + message = paste(message," + stage 2", sep="") + cat(paste(message,"\n", sep="")) + } if (WER=="mix") {synchrones_file = paste(bin_dir,"synchrones",sep="") ; unlink(synchrones_file)} - if (parll) + if (parll && ntasks>1) { cl = parallel::makeCluster(ncores_tasks) varlist = c("getSeries","getContribs","K1","K2","verbose","parll", - "nb_series_per_chunk","ncores_clust","sep","nbytes","endian") + "nb_series_per_chunk","ntasks","ncores_clust","sep","nbytes","endian") if (WER=="mix") varlist = c(varlist, "synchrones_file") parallel::clusterExport(cl, varlist=varlist, envir = environment()) } # 1000*K1 indices [if WER=="end"], or empty vector [if WER=="mix"] --> series on file - if (parll) + if (parll && ntasks>1) indices = unlist( parallel::parLapply(cl, indices_tasks, runTwoStepClustering) ) else indices = unlist( lapply(indices_tasks, runTwoStepClustering) ) - if (parll) + if (parll && ntasks>1) parallel::stopCluster(cl) getRefSeries = getSeries @@ -226,13 +233,14 @@ claws = function(getSeries, K1, K2, cat("...Run final // stage 1 + stage 2\n") indices_medoids = clusteringTask1( indices, getContribs, K1, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll) - medoids = computeClusters2(getSeries(indices_medoids), K2, + medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) ) + medoids2 = clusteringTask2(medoids1, K2, getRefSeries, nb_curves, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll) # Cleanup unlink(bin_dir, recursive=TRUE) - medoids + medoids2 } #' curvesToContribs