add some prints in convert_to_CSV.c; almost working. TODO...
[epclust.git] / epclust / R / clustering.R
1 #' Two-stage clustering, within one task (see \code{claws()})
2 #'
3 #' \code{clusteringTask1()} runs one full stage-1 task, which consists in iterated
4 #' clustering on nb_curves / ntasks energy contributions, computed through
5 #' discrete wavelets coefficients.
6 #' \code{clusteringTask2()} runs a full stage-2 task, which consists in WER distances
7 #' computations between medoids (indices) output from stage 1, before applying
8 #' the second clustering algorithm on the distances matrix.
9 #'
10 #' @param getContribs Function to retrieve contributions from initial series indices:
11 #' \code{getContribs(indices)} outputs a contributions matrix, in columns
12 #' @inheritParams claws
13 #' @inheritParams computeSynchrones
14 #' @inheritParams computeWerDists
15 #'
16 #' @return The indices of the computed (resp. K1 and K2) medoids.
17 #'
18 #' @name clustering
19 #' @rdname clustering
20 #' @aliases clusteringTask1 clusteringTask2
21 NULL
22
23 #' @rdname clustering
24 #' @export
25 clusteringTask1 <- function(indices, getContribs, K1, algoClust1, nb_items_clust,
26 ncores_clust=3, verbose=FALSE)
27 {
28 if (verbose)
29 cat(paste("*** Clustering task 1 on ",length(indices)," series [start]\n", sep=""))
30
31 if (length(indices) <= K1)
32 return (indices)
33
34 parll <- (ncores_clust > 1)
35 if (parll)
36 {
37 # outfile=="" to see stderr/stdout on terminal
38 cl <-
39 if (verbose)
40 parallel::makeCluster(ncores_clust, outfile = "")
41 else
42 parallel::makeCluster(ncores_clust)
43 parallel::clusterExport(cl, c("getContribs","K1","verbose"), envir=environment())
44 }
45 # Iterate clustering algorithm 1 until K1 medoids are found
46 while (length(indices) > K1)
47 {
48 # Balance tasks by splitting the indices set - as evenly as possible
49 indices_workers <- .splitIndices(indices, nb_items_clust, min_size=K1+1)
50 indices <-
51 if (parll)
52 {
53 unlist( parallel::parLapply(cl, indices_workers, function(inds) {
54 require("epclust", quietly=TRUE)
55 inds[ algoClust1(getContribs(inds), K1) ]
56 }) )
57 }
58 else
59 {
60 unlist( lapply(indices_workers, function(inds)
61 inds[ algoClust1(getContribs(inds), K1) ]
62 ) )
63 }
64 if (verbose)
65 {
66 cat(paste("*** Clustering task 1 on ",length(indices)," medoids [iter]\n", sep=""))
67 }
68 }
69 if (parll)
70 parallel::stopCluster(cl)
71
72 indices #medoids
73 }
74
75 #' @rdname clustering
76 #' @export
77 clusteringTask2 <- function(indices, getSeries, K2, algoClust2, nb_series_per_chunk,
78 smooth_lvl, nvoice, nbytes, endian, ncores_clust=3, verbose=FALSE)
79 {
80 if (verbose)
81 cat(paste("*** Clustering task 2 on ",length(indices)," medoids\n", sep=""))
82
83 if (length(indices) <= K2)
84 return (indices)
85
86 # A) Compute the WER distances (Wavelets Extended coefficient of deteRmination)
87 distances <- computeWerDists(indices, getSeries, nb_series_per_chunk,
88 smooth_lvl, nvoice, nbytes, endian, ncores_clust, verbose)
89
90 # B) Apply clustering algorithm 2 on the WER distances matrix
91 if (verbose)
92 cat(paste("*** algoClust2() on ",nrow(distances)," items\n", sep=""))
93 indices[ algoClust2(distances,K2) ]
94 }