X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fmain.R;h=9ba23ae64fcd67a793b7a1a92eb4f8a5ad8fe97a;hb=363ae13430cdee6ba76b42b7316aa4b292b04d93;hp=9064dfaec6ee0bed2fa0a5544a093bab91141e5e;hpb=492cd9e74a79cbcc0ecde55fa3071a44b7e463dc;p=epclust.git diff --git a/epclust/R/main.R b/epclust/R/main.R index 9064dfa..9ba23ae 100644 --- a/epclust/R/main.R +++ b/epclust/R/main.R @@ -7,8 +7,9 @@ #' @param getSeries Access to the (time-)series, which can be of one of the three #' following types: #' \itemize{ -#' \item matrix: each line contains all the values for one time-serie, ordered by time -#' \item connection: any R connection object (e.g. a file) providing lines as described above +#' \item [big.]matrix: each line contains all the values for one time-serie, ordered by time +#' \item connection: any R connection object providing lines as described above +#' \item character: name of a CSV file containing series in rows (no header) #' \item function: a custom way to retrieve the curves; it has only one argument: #' the indices of the series to be retrieved. See examples #' } @@ -32,7 +33,7 @@ #' @param verbose Level of verbosity (0/FALSE for nothing or 1/TRUE for all; devel stage) #' @param parll TRUE to fully parallelize; otherwise run sequentially (debug, comparison) #' -#' @return A matrix of the final medoids curves (K2) in rows +#' @return A big.matrix of the final medoids curves (K2) in rows #' #' @examples #' \dontrun{ @@ -144,7 +145,7 @@ claws = function(getSeries, K1, K2, getSeries = function(inds) getDataInFile(inds, series_file, nbytes, endian) } - # Serialize all computed wavelets contributions onto a file + # Serialize all computed wavelets contributions into a file contribs_file = paste(bin_dir,"contribs",sep="") ; unlink(contribs_file) index = 1 nb_curves = 0 @@ -161,31 +162,17 @@ claws = function(getSeries, K1, K2, if (nb_series_per_task < min_series_per_chunk) stop("Too many tasks: less series in one task than min_series_per_chunk!") - # Cluster contributions in parallel (by nb_series_per_chunk) - indices_all = if (random) sample(nb_curves) else seq_len(nb_curves) - indices_tasks = lapply(seq_len(ntasks), function(i) { - upper_bound = ifelse( i1) require("epclust", quietly=TRUE) indices_medoids = clusteringTask1( inds, getContribs, K1, nb_series_per_chunk, ncores_clust, verbose, parll) if (WER=="mix") { - medoids2 = computeClusters2(getSeries(indices_medoids), + require("bigmemory", quietly=TRUE) + medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) ) + medoids2 = clusteringTask2(medoids1, K2, getSeries, nb_curves, nb_series_per_chunk, ncores_clust, verbose, parll) binarize(medoids2, synchrones_file, nb_series_per_chunk, sep, nbytes, endian) return (vector("integer",0)) @@ -193,16 +180,40 @@ claws = function(getSeries, K1, K2, indices_medoids } + # Cluster contributions in parallel (by nb_series_per_chunk) + indices_all = if (random) sample(nb_curves) else seq_len(nb_curves) + indices_tasks = lapply(seq_len(ntasks), function(i) { + upper_bound = ifelse( i1) + { + cl = parallel::makeCluster(ncores_tasks) + varlist = c("getSeries","getContribs","K1","K2","verbose","parll", + "nb_series_per_chunk","ntasks","ncores_clust","sep","nbytes","endian") + if (WER=="mix") + varlist = c(varlist, "synchrones_file") + parallel::clusterExport(cl, varlist=varlist, envir = environment()) + } + # 1000*K1 indices [if WER=="end"], or empty vector [if WER=="mix"] --> series on file - if (parll) + if (parll && ntasks>1) indices = unlist( parallel::parLapply(cl, indices_tasks, runTwoStepClustering) ) else indices = unlist( lapply(indices_tasks, runTwoStepClustering) ) - if (parll) + if (parll && ntasks>1) parallel::stopCluster(cl) getRefSeries = getSeries - synchrones_file = paste(bin_dir,"synchrones",sep="") ; unlink(synchrones_file) if (WER=="mix") { indices = seq_len(ntasks*K2) @@ -222,14 +233,15 @@ claws = function(getSeries, K1, K2, if (verbose) cat("...Run final // stage 1 + stage 2\n") indices_medoids = clusteringTask1( - indices, getContribs, K1, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose) - medoids = computeClusters2(getSeries(indices_medoids), - K2, getRefSeries, nb_curves, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose) + indices, getContribs, K1, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll) + medoids1 = bigmemory::as.big.matrix( getSeries(indices_medoids) ) + medoids2 = clusteringTask2(medoids1, K2, + getRefSeries, nb_curves, nb_series_per_chunk, ncores_tasks*ncores_clust, verbose, parll) # Cleanup unlink(bin_dir, recursive=TRUE) - medoids + medoids2 } #' curvesToContribs