Commit | Line | Data |
---|---|---|
40f12a2f BA |
1 | #' computeWerDists |
2 | #' | |
3 | #' Compute the WER distances between the synchrones curves (in columns), which are | |
4 | #' returned (e.g.) by \code{computeSynchrones()} | |
5 | #' | |
6 | #' @param synchrones A big.matrix of synchrones, in columns. The series have same | |
7 | #' length as the series in the initial dataset | |
8 | #' @inheritParams claws | |
9 | #' | |
10 | #' @return A distances matrix of size K1 x K1 | |
11 | #' | |
12 | #' @export | |
13 | computeWerDists = function(synchrones, nvoice, nbytes,endian,ncores_clust=1, | |
14 | verbose=FALSE,parll=TRUE) | |
15 | { | |
16 | n <- ncol(synchrones) | |
17 | L <- nrow(synchrones) | |
18 | noctave = ceiling(log2(L)) #min power of 2 to cover serie range | |
19 | ||
20 | # Initialize result as a square big.matrix of size 'number of synchrones' | |
21 | Xwer_dist <- bigmemory::big.matrix(nrow=n, ncol=n, type="double") | |
22 | ||
23 | # Generate n(n-1)/2 pairs for WER distances computations | |
24 | pairs = list() | |
25 | V = seq_len(n) | |
26 | for (i in 1:n) | |
27 | { | |
28 | V = V[-1] | |
29 | pairs = c(pairs, lapply(V, function(v) c(i,v))) | |
30 | } | |
31 | ||
32 | cwt_file = ".cwt.bin" | |
33 | # Compute the synchrones[,indices] CWT, and store the results in the binary file | |
34 | computeSaveCWT = function(indices) | |
35 | { | |
36 | if (parll) | |
37 | { | |
38 | require("bigmemory", quietly=TRUE) | |
39 | require("Rwave", quietly=TRUE) | |
40 | require("epclust", quietly=TRUE) | |
41 | synchrones <- bigmemory::attach.big.matrix(synchrones_desc) | |
42 | } | |
43 | ||
44 | # Obtain CWT as big vectors of real part + imaginary part (concatenate) | |
45 | ts_cwt <- sapply(indices, function(i) { | |
46 | ts <- scale(ts(synchrones[,i]), center=TRUE, scale=FALSE) | |
47 | ts_cwt <- Rwave::cwt(ts, noctave, nvoice, w0=2*pi, twoD=TRUE, plot=FALSE) | |
48 | c( as.double(Re(ts_cwt)),as.double(Im(ts_cwt)) ) | |
49 | }) | |
50 | ||
51 | # Serialization | |
52 | binarize(ts_cwt, cwt_file, 1, ",", nbytes, endian) | |
53 | } | |
54 | ||
55 | if (parll) | |
56 | { | |
57 | cl = parallel::makeCluster(ncores_clust) | |
58 | synchrones_desc <- bigmemory::describe(synchrones) | |
59 | Xwer_dist_desc <- bigmemory::describe(Xwer_dist) | |
60 | parallel::clusterExport(cl, varlist=c("parll","synchrones_desc","Xwer_dist_desc", | |
61 | "noctave","nvoice","verbose","getCWT"), envir=environment()) | |
62 | } | |
63 | ||
64 | if (verbose) | |
65 | cat(paste("--- Precompute and serialize synchrones CWT\n", sep="")) | |
66 | ||
67 | ignored <- | |
68 | if (parll) | |
69 | parallel::parLapply(cl, 1:n, computeSaveCWT) | |
70 | else | |
71 | lapply(1:n, computeSaveCWT) | |
72 | ||
73 | # Function to retrieve a synchrone CWT from (binary) file | |
74 | getSynchroneCWT = function(index, L) | |
75 | { | |
76 | flat_cwt <- getDataInFile(index, cwt_file, nbytes, endian) | |
77 | cwt_length = length(flat_cwt) / 2 | |
78 | re_part = as.matrix(flat_cwt[1:cwt_length], nrow=L) | |
79 | im_part = as.matrix(flat_cwt[(cwt_length+1):(2*cwt_length)], nrow=L) | |
80 | re_part + 1i * im_part | |
81 | } | |
82 | ||
83 | ||
84 | ||
85 | ||
86 | #TODO: better repartition here, | |
87 | #better code in .splitIndices :: never exceed nb_per_chunk; arg: min_per_chunk (default: 1) | |
88 | ###TODO: reintroduire nb_items_clust ======> l'autre est typiquement + grand !!! (pas de relation !) | |
89 | ||
90 | ||
91 | ||
92 | # Compute distance between columns i and j in synchrones | |
93 | computeDistanceIJ = function(pair) | |
94 | { | |
95 | if (parll) | |
96 | { | |
97 | # parallel workers start with an empty environment | |
98 | require("bigmemory", quietly=TRUE) | |
99 | require("epclust", quietly=TRUE) | |
100 | synchrones <- bigmemory::attach.big.matrix(synchrones_desc) | |
101 | Xwer_dist <- bigmemory::attach.big.matrix(Xwer_dist_desc) | |
102 | } | |
103 | ||
104 | i = pair[1] ; j = pair[2] | |
105 | if (verbose && j==i+1 && !parll) | |
106 | cat(paste(" Distances (",i,",",j,"), (",i,",",j+1,") ...\n", sep="")) | |
107 | ||
108 | # Compute CWT of columns i and j in synchrones | |
109 | L = nrow(synchrones) | |
110 | cwt_i <- getSynchroneCWT(i, L) | |
111 | cwt_j <- getSynchroneCWT(j, L) | |
112 | ||
113 | # Compute the ratio of integrals formula 5.6 for WER^2 | |
114 | # in https://arxiv.org/abs/1101.4744v2 ยง5.3 | |
115 | num <- filterMA(Mod(cwt_i * Conj(cwt_j))) | |
116 | WX <- filterMA(Mod(cwt_i * Conj(cwt_i))) | |
117 | WY <- filterMA(Mod(cwt_j * Conj(cwt_j))) | |
118 | wer2 <- sum(colSums(num)^2) / sum(colSums(WX) * colSums(WY)) | |
119 | ||
120 | Xwer_dist[i,j] <- sqrt(L * ncol(cwt_i) * (1 - wer2)) | |
121 | Xwer_dist[j,i] <- Xwer_dist[i,j] | |
122 | Xwer_dist[i,i] <- 0. | |
123 | } | |
124 | ||
125 | if (verbose) | |
126 | cat(paste("--- Compute WER distances\n", sep="")) | |
127 | ||
128 | ignored <- | |
129 | if (parll) | |
130 | parallel::parLapply(cl, pairs, computeDistanceIJ) | |
131 | else | |
132 | lapply(pairs, computeDistanceIJ) | |
133 | ||
134 | if (parll) | |
135 | parallel::stopCluster(cl) | |
136 | ||
137 | unlink(cwt_file) | |
138 | ||
139 | Xwer_dist[n,n] = 0. | |
140 | Xwer_dist[,] #~small matrix K1 x K1 | |
141 | } |