+ # Generate a random permutation of 1:N (if random==TRUE); otherwise just use arrival
+ # (storage) order.
+ indices_all = if (random) sample(nb_curves) else seq_len(nb_curves)
+ # Split (all) indices into ntasks groups of ~same size
+ indices_tasks = lapply(seq_len(ntasks), function(i) {
+ upper_bound = ifelse( i<ntasks, min(nb_series_per_task*i,nb_curves), nb_curves )
+ indices_all[((i-1)*nb_series_per_task+1):upper_bound]
+ })
+
+ if (parll && ntasks>1)
+ {
+ # Initialize parallel runs: outfile="" allow to output verbose traces in the console
+ # under Linux. All necessary variables are passed to the workers.
+ cl = parallel::makeCluster(ncores_tasks, outfile="")
+ varlist = c("getSeries","getContribs","K1","K2","algo_clust1","algo_clust2",
+ "nb_per_chunk","nb_items_clust","ncores_clust","sep","nbytes","endian",
+ "verbose","parll")
+ if (WER=="mix")
+ varlist = c(varlist, "medoids_file")
+ parallel::clusterExport(cl, varlist, envir = environment())
+ }
+
+ # This function achieves one complete clustering task, divided in stage 1 + stage 2.
+ # stage 1: n indices --> clusteringTask1(...) --> K1 medoids
+ # stage 2: K1 medoids --> clusteringTask2(...) --> K2 medoids,
+ # where n = N / ntasks, N being the total number of curves.