- library(parallel, quietly=TRUE)
- cl_tasks = parallel::makeCluster(ncores_tasks)
- parallel::clusterExport(cl_tasks,
- varlist=c("K1","K2","WER","nb_series_per_chunk","ncores_clust"),#TODO: pass also
- #nb_coeffs...and filename (in a list... ?)
- envir=environment())
- indices = parallel::parLapply(cl_tasks, indices_tasks, clusteringTask)
- parallel::stopCluster(cl_tasks)
+ cl = parallel::makeCluster(ncores_tasks)
+ # 1000*K1 indices [if WER=="end"], or empty vector [if WER=="mix"] --> series on file
+ indices = unlist( parallel::parLapply(cl, indices_tasks, function(inds) {
+ indices_medoids = clusteringTask(inds,getCoefs,K1,nb_series_per_chunk,ncores_clust)
+ if (WER=="mix")
+ {
+ medoids2 = computeClusters2(
+ getSeries(indices_medoids), K2, getSeries, nb_series_per_chunk)
+ serialize(medoids2, synchrones_file, nb_series_per_chunk, sep, nbytes, endian)
+ return (vector("integer",0))
+ }
+ indices_medoids
+ }) )
+ parallel::stopCluster(cl)
+
+ getRefSeries = getSeries
+ synchrones_file = paste(bin_dir,"synchrones",sep="") ; unlink(synchrones_file)
+ if (WER=="mix")
+ {
+ indices = seq_len(ntasks*K2)
+ #Now series must be retrieved from synchrones_file
+ getSeries = function(inds) getDataInFile(inds, synchrones_file, nbytes, endian)
+ #Coefs must be re-computed
+ unlink(coefs_file)
+ index = 1
+ repeat
+ {
+ series = getSeries((index-1)+seq_len(nb_series_per_chunk))
+ if (is.null(series))
+ break
+ coefs_chunk = curvesToCoefs(series, wf)
+ serialize(coefs_chunk, coefs_file, nb_series_per_chunk, sep, nbytes, endian)
+ index = index + nb_series_per_chunk
+ }
+ }
+
+ # Run step2 on resulting indices or series (from file)
+ indices_medoids = clusteringTask(
+ indices, getCoefs, K1, nb_series_per_chunk, ncores_tasks*ncores_clust)
+ computeClusters2(getSeries(indices_medoids),K2,getRefSeries,nb_series_per_chunk)
+}
+
+# helper
+curvesToCoefs = function(series, wf)
+{
+ L = length(series[1,])
+ D = ceiling( log2(L) )
+ nb_sample_points = 2^D
+ t( apply(series, 1, function(x) {
+ interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
+ W = wavelets::dwt(interpolated_curve, filter=wf, D)@W
+ rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
+ }) )
+}