+ # Generate a random permutation of 1:N (if random==TRUE);
+ # otherwise just use arrival (storage) order.
+ indices_all = if (random) sample(nb_curves) else seq_len(nb_curves)
+ # Split (all) indices into ntasks groups of ~same size
+ indices_tasks = lapply(seq_len(ntasks), function(i) {
+ upper_bound = ifelse( i<ntasks, min(nb_series_per_task*i,nb_curves), nb_curves )
+ indices_all[((i-1)*nb_series_per_task+1):upper_bound]
+ })
+
+ if (parll && ntasks>1)
+ {
+ # Initialize parallel runs: outfile="" allow to output verbose traces in the console
+ # under Linux. All necessary variables are passed to the workers.
+ cl = parallel::makeCluster(ncores_tasks, outfile="")
+ varlist = c("ncores_clust","verbose","parll", #task 1 & 2
+ "K1","getContribs","algoClust1","nb_items_clust") #task 1
+ if (WER=="mix")
+ {
+ # Add variables for task 2
+ varlist = c(varlist, "K2","getSeries","algoClust2","nb_series_per_chunk",
+ "nvoice","nbytes","endian")
+ }
+ parallel::clusterExport(cl, varlist, envir = environment())
+ }
+
+ # This function achieves one complete clustering task, divided in stage 1 + stage 2.
+ # stage 1: n indices --> clusteringTask1(...) --> K1 medoids (indices)
+ # stage 2: K1 indices --> K1xK1 WER distances --> clusteringTask2(...) --> K2 medoids,
+ # where n = N / ntasks, N being the total number of curves.