X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fmain.R;h=977e61b235da2ea1ac4e0fc3c4cd40bbddcc049f;hb=e161499b97c782aadfc287c22b55f85724f86fae;hp=892c64c631ffafc67058e5d491ba6e86923ca4ba;hpb=95b5c2e621af8949c5a5eed287d451817c16c24e;p=epclust.git diff --git a/epclust/R/main.R b/epclust/R/main.R index 892c64c..977e61b 100644 --- a/epclust/R/main.R +++ b/epclust/R/main.R @@ -7,8 +7,9 @@ #' @param getSeries Access to the (time-)series, which can be of one of the three #' following types: #' \itemize{ -#' \item matrix: each line contains all the values for one time-serie, ordered by time -#' \item connection: any R connection object (e.g. a file) providing lines as described above +#' \item [big.]matrix: each line contains all the values for one time-serie, ordered by time +#' \item connection: any R connection object providing lines as described above +#' \item character: name of a CSV file containing series in rows (no header) #' \item function: a custom way to retrieve the curves; it has only one argument: #' the indices of the series to be retrieved. See examples #' } @@ -32,7 +33,7 @@ #' @param verbose Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage) #' @param parll TRUE to fully parallelize; otherwise run sequentially (debug, comparison) #' -#' @return A matrix of the final medoids curves (K2) in rows +#' @return A big.matrix of the final medoids curves (K2) in rows #' #' @examples #' \dontrun{ @@ -163,22 +164,14 @@ claws = function(getSeries, K1, K2, runTwoStepClustering = function(inds) { - if (parll) + if (parll && ntasks>1) require("epclust", quietly=TRUE) indices_medoids = clusteringTask1( inds, getContribs, K1, nb_series_per_chunk, ncores_clust, verbose, parll) if (WER=="mix") { - - - - -#TODO: getSeries(indices_medoids) BAD ; il faudrait une big.matrix de medoids en entree - #OK en RAM il y en aura 1000 (donc 1000*K1*17519... OK) - #...mais du coup chaque process ne re-dupliquera pas medoids - - - medoids2 = computeClusters2(getSeries(indices_medoids), + medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) ) + medoids2 = clusteringTask2(medoids1, K2, getSeries, nb_curves, nb_series_per_chunk, ncores_clust, verbose, parll) binarize(medoids2, synchrones_file, nb_series_per_chunk, sep, nbytes, endian) return (vector("integer",0)) @@ -193,25 +186,30 @@ claws = function(getSeries, K1, K2, indices_all[((i-1)*nb_series_per_task+1):upper_bound] }) if (verbose) - cat(paste("...Run ",ntasks," x stage 1 in parallel\n",sep="")) + { + message = paste("...Run ",ntasks," x stage 1", sep="") + if (WER=="mix") + message = paste(message," + stage 2", sep="") + cat(paste(message,"\n", sep="")) + } if (WER=="mix") {synchrones_file = paste(bin_dir,"synchrones",sep="") ; unlink(synchrones_file)} - if (parll) + if (parll && ntasks>1) { cl = parallel::makeCluster(ncores_tasks) varlist = c("getSeries","getContribs","K1","K2","verbose","parll", - "nb_series_per_chunk","ncores_clust","sep","nbytes","endian") + "nb_series_per_chunk","ntasks","ncores_clust","sep","nbytes","endian") if (WER=="mix") varlist = c(varlist, "synchrones_file") parallel::clusterExport(cl, varlist=varlist, envir = environment()) } # 1000*K1 indices [if WER=="end"], or empty vector [if WER=="mix"] --> series on file - if (parll) + if (parll && ntasks>1) indices = unlist( parallel::parLapply(cl, indices_tasks, runTwoStepClustering) ) else indices = unlist( lapply(indices_tasks, runTwoStepClustering) ) - if (parll) + if (parll && ntasks>1) parallel::stopCluster(cl) getRefSeries = getSeries @@ -230,22 +228,19 @@ claws = function(getSeries, K1, K2, contribs_file, nb_series_per_chunk, nbytes, endian) } - - -#TODO: if ntasks==1, c'est deja terminé - # Run step2 on resulting indices or series (from file) if (verbose) cat("...Run final // stage 1 + stage 2\n") indices_medoids = clusteringTask1( indices, getContribs, K1, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll) - medoids = computeClusters2(getSeries(indices_medoids), K2, + medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) ) + medoids2 = clusteringTask2(medoids1, K2, getRefSeries, nb_curves, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll) # Cleanup unlink(bin_dir, recursive=TRUE) - medoids + medoids2 } #' curvesToContribs