throw away old code, prepare tests
[epclust.git] / epclust / tests / testthat / test.clusteringTask.R
diff --git a/epclust/tests/testthat/test.clusteringTask.R b/epclust/tests/testthat/test.clusteringTask.R
new file mode 100644 (file)
index 0000000..f45276c
--- /dev/null
@@ -0,0 +1,38 @@
+computeClusters
+computeSynchrones
+
+# Cluster one full task (nb_curves / ntasks series)
+clusteringTask = function(indices,getSeries,getSeriesForSynchrones,synchrones_file,
+       getCoefs,K1,K2,nb_series_per_chunk,ncores,to_file,ftype)
+{
+       cl = parallel::makeCluster(ncores)
+       repeat
+       {
+               nb_workers = max( 1, round( length(indices) / nb_series_per_chunk ) )
+               indices_workers = lapply(seq_len(nb_workers), function(i) {
+                       upper_bound = ifelse( i<nb_workers,
+                               min(nb_series_per_chunk*i,length(indices)), length(indices) )
+                       indices[(nb_series_per_chunk*(i-1)+1):upper_bound]
+               })
+               indices = unlist( parallel::parLapply(cl, indices_workers, function(inds)
+                       computeClusters1(inds, getCoefs, K1)) )
+               if (length(indices_clust) == K1)
+                       break
+       }
+       parallel::stopCluster(cl)
+       if (K2 == 0)
+               return (indices)
+       computeClusters2(indices, K2, getSeries, getSeriesForSynchrones, to_file,
+                                                                        nb_series_per_chunk,ftype)
+       vector("integer",0)
+}
+
+# Apply the clustering algorithm (PAM) on a coeffs or distances matrix
+computeClusters1 = function(indices, getCoefs, K1)
+{
+       coefs = getCoefs(indices)
+       indices[ cluster::pam(coefs, K1, diss=FALSE)$id.med ]
+}
+
+# Cluster a chunk of series inside one task (~max nb_series_per_chunk)
+computeClusters2 = function(indices, K2, getSeries, getSeriesForSynchrones, to_file,