Commit | Line | Data |
---|---|---|
56857861 BA |
1 | context("clustering") |
2 | ||
8702eb86 BA |
3 | #shorthand: map 1->1, 2->2, 3->3, 4->1, ..., 149->2, 150->3, ... (is base==3) |
4 | I = function(i, base) | |
5 | (i-1) %% base + 1 | |
56857861 | 6 | |
e161499b | 7 | test_that("computeClusters1&2 behave as expected", |
56857861 | 8 | { |
8702eb86 | 9 | require("MASS", quietly=TRUE) |
4bcfdbee BA |
10 | if (!require("clue", quietly=TRUE)) |
11 | skip("'clue' package not available") | |
56857861 | 12 | |
8702eb86 BA |
13 | # 3 gaussian clusters, 300 items; and then 7 gaussian clusters, 490 items |
14 | n = 300 | |
15 | d = 5 | |
16 | K = 3 | |
17 | for (ndK in list( c(300,5,3), c(490,10,7) )) | |
18 | { | |
19 | n = ndK[1] ; d = ndK[2] ; K = ndK[3] | |
20 | cs = n/K #cluster size | |
21 | Id = diag(d) | |
37c82bba | 22 | coefs = sapply(1:K, function(i) MASS::mvrnorm(cs, c(rep(0,(i-1)),5,rep(0,d-i)), Id)) |
e161499b BA |
23 | indices_medoids1 = computeClusters1(coefs, K, verbose=TRUE) |
24 | indices_medoids2 = computeClusters2(dist(coefs), K, verbose=TRUE) | |
8702eb86 | 25 | # Get coefs assignments (to medoids) |
e161499b | 26 | assignment1 = sapply(seq_len(n), function(i) |
37c82bba | 27 | which.min( colSums( sweep(coefs[,indices_medoids1],1,coefs[,i],'-')^2 ) ) ) |
e161499b | 28 | assignment2 = sapply(seq_len(n), function(i) |
37c82bba | 29 | which.min( colSums( sweep(coefs[,indices_medoids2],1,coefs[,i],'-')^2 ) ) ) |
8702eb86 | 30 | for (i in 1:K) |
e161499b BA |
31 | { |
32 | expect_equal(sum(assignment1==i), cs, tolerance=5) | |
33 | expect_equal(sum(assignment2==i), cs, tolerance=5) | |
34 | } | |
8702eb86 | 35 | |
e161499b BA |
36 | costs_matrix1 = matrix(nrow=K,ncol=K) |
37 | costs_matrix2 = matrix(nrow=K,ncol=K) | |
8702eb86 BA |
38 | for (i in 1:K) |
39 | { | |
40 | for (j in 1:K) | |
41 | { | |
42 | # assign i (in result) to j (order 1,2,3) | |
e161499b BA |
43 | costs_matrix1[i,j] = abs( mean(assignment1[((i-1)*cs+1):(i*cs)]) - j ) |
44 | costs_matrix2[i,j] = abs( mean(assignment2[((i-1)*cs+1):(i*cs)]) - j ) | |
8702eb86 BA |
45 | } |
46 | } | |
e161499b BA |
47 | permutation1 = as.integer( clue::solve_LSAP(costs_matrix1) ) |
48 | permutation2 = as.integer( clue::solve_LSAP(costs_matrix2) ) | |
8702eb86 BA |
49 | for (i in 1:K) |
50 | { | |
51 | expect_equal( | |
e161499b BA |
52 | mean(assignment1[((i-1)*cs+1):(i*cs)]), permutation1[i], tolerance=0.05) |
53 | expect_equal( | |
54 | mean(assignment2[((i-1)*cs+1):(i*cs)]), permutation2[i], tolerance=0.05) | |
8702eb86 BA |
55 | } |
56 | } | |
56857861 BA |
57 | }) |
58 | ||
59 | test_that("computeSynchrones behave as expected", | |
60 | { | |
8702eb86 BA |
61 | n = 300 |
62 | x = seq(0,9.5,0.1) | |
63 | L = length(x) #96 1/4h | |
64 | K = 3 | |
65 | s1 = cos(x) | |
66 | s2 = sin(x) | |
67 | s3 = c( s1[1:(L%/%2)] , s2[(L%/%2+1):L] ) | |
68 | #sum((s1-s2)^2) == 96 | |
69 | #sum((s1-s3)^2) == 58 | |
70 | #sum((s2-s3)^2) == 38 | |
71 | s = list(s1, s2, s3) | |
37c82bba | 72 | series = matrix(nrow=L, ncol=n) |
8702eb86 | 73 | for (i in seq_len(n)) |
37c82bba | 74 | series[,i] = s[[I(i,K)]] + rnorm(L,sd=0.01) |
8702eb86 | 75 | getRefSeries = function(indices) { |
492cd9e7 | 76 | indices = indices[indices <= n] |
37c82bba | 77 | if (length(indices)>0) series[,indices] else NULL |
8702eb86 | 78 | } |
37c82bba | 79 | synchrones = computeSynchrones(bigmemory::as.big.matrix(cbind(s1,s2,s3)), getRefSeries, |
e161499b | 80 | n, 100, verbose=TRUE, parll=FALSE) |
56857861 | 81 | |
37c82bba | 82 | expect_equal(dim(synchrones), c(L,K)) |
8702eb86 | 83 | for (i in 1:K) |
37c82bba | 84 | expect_equal(synchrones[,i], s[[i]], tolerance=0.01) |
56857861 BA |
85 | }) |
86 | ||
e161499b | 87 | # NOTE: medoids can be a big.matrix |
8702eb86 BA |
88 | computeDistortion = function(series, medoids) |
89 | { | |
90 | n = nrow(series) ; L = ncol(series) | |
91 | distortion = 0. | |
e161499b BA |
92 | if (bigmemory::is.big.matrix(medoids)) |
93 | medoids = medoids[,] | |
8702eb86 | 94 | for (i in seq_len(n)) |
37c82bba | 95 | distortion = distortion + min( colSums( sweep(medoids,1,series[,i],'-')^2 ) / L ) |
8702eb86 BA |
96 | distortion / n |
97 | } | |
98 | ||
e161499b | 99 | test_that("clusteringTask1 behave as expected", |
56857861 | 100 | { |
8702eb86 BA |
101 | n = 900 |
102 | x = seq(0,9.5,0.1) | |
103 | L = length(x) #96 1/4h | |
104 | K1 = 60 | |
8702eb86 BA |
105 | s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) ) |
106 | series = matrix(nrow=n, ncol=L) | |
107 | for (i in seq_len(n)) | |
37c82bba | 108 | series[,i] = s[[I(i,K1)]] + rnorm(L,sd=0.01) |
e161499b | 109 | getSeries = function(indices) { |
492cd9e7 | 110 | indices = indices[indices <= n] |
37c82bba | 111 | if (length(indices)>0) series[,indices] else NULL |
8702eb86 | 112 | } |
e161499b BA |
113 | wf = "haar" |
114 | ctype = "absolute" | |
37c82bba | 115 | getContribs = function(indices) curvesToContribs(series[,indices],wf,ctype) |
e161499b BA |
116 | indices1 = clusteringTask1(1:n, getContribs, K1, 75, verbose=TRUE, parll=FALSE) |
117 | medoids_K1 = getSeries(indices1) | |
56857861 | 118 | |
37c82bba | 119 | expect_equal(dim(medoids_K1), c(L,K1)) |
8702eb86 | 120 | # Not easy to evaluate result: at least we expect it to be better than random selection of |
e161499b BA |
121 | # medoids within initial series |
122 | distorGood = computeDistortion(series, medoids_K1) | |
8702eb86 | 123 | for (i in 1:3) |
37c82bba | 124 | expect_lte( distorGood, computeDistortion(series,series[,sample(1:n, K1)]) ) |
56857861 BA |
125 | }) |
126 | ||
e161499b | 127 | test_that("clusteringTask2 behave as expected", |
56857861 | 128 | { |
8702eb86 BA |
129 | n = 900 |
130 | x = seq(0,9.5,0.1) | |
131 | L = length(x) #96 1/4h | |
132 | K1 = 60 | |
133 | K2 = 3 | |
e161499b | 134 | #for (i in 1:60) {plot(x^(1+i/30)*cos(x+i),type="l",col=i,ylim=c(-50,50)); par(new=TRUE)} |
8702eb86 BA |
135 | s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) ) |
136 | series = matrix(nrow=n, ncol=L) | |
137 | for (i in seq_len(n)) | |
138 | series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01) | |
e161499b | 139 | getRefSeries = function(indices) { |
8702eb86 | 140 | indices = indices[indices <= n] |
37c82bba | 141 | if (length(indices)>0) series[,indices] else NULL |
8702eb86 | 142 | } |
e161499b | 143 | # Artificially simulate 60 medoids - perfect situation, all equal to one of the refs |
37c82bba | 144 | medoids_K1 = bigmemory::as.big.matrix( sapply( 1:K1, function(i) s[[I(i,K1)]] ) ) |
e161499b | 145 | medoids_K2 = clusteringTask2(medoids_K1, K2, getRefSeries, n, 75, verbose=TRUE, parll=FALSE) |
56857861 | 146 | |
37c82bba | 147 | expect_equal(dim(medoids_K2), c(L,K2)) |
8702eb86 BA |
148 | # Not easy to evaluate result: at least we expect it to be better than random selection of |
149 | # medoids within 1...K1 (among references) | |
150 | distorGood = computeDistortion(series, medoids_K2) | |
151 | for (i in 1:3) | |
37c82bba | 152 | expect_lte( distorGood, computeDistortion(series,medoids_K1[,sample(1:K1, K2)]) ) |
56857861 | 153 | }) |
e161499b BA |
154 | |
155 | #NOTE: rather redundant test | |
156 | #test_that("clusteringTask1 + clusteringTask2 behave as expected", | |
157 | #{ | |
158 | # n = 900 | |
159 | # x = seq(0,9.5,0.1) | |
160 | # L = length(x) #96 1/4h | |
161 | # K1 = 60 | |
162 | # K2 = 3 | |
163 | # s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) ) | |
164 | # series = matrix(nrow=n, ncol=L) | |
165 | # for (i in seq_len(n)) | |
166 | # series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01) | |
167 | # getSeries = function(indices) { | |
168 | # indices = indices[indices <= n] | |
169 | # if (length(indices)>0) series[indices,] else NULL | |
170 | # } | |
171 | # wf = "haar" | |
172 | # ctype = "absolute" | |
173 | # getContribs = function(indices) curvesToContribs(series[indices,],wf,ctype) | |
174 | # require("bigmemory", quietly=TRUE) | |
175 | # indices1 = clusteringTask1(1:n, getContribs, K1, 75, verbose=TRUE, parll=FALSE) | |
176 | # medoids_K1 = bigmemory::as.big.matrix( getSeries(indices1) ) | |
177 | # medoids_K2 = clusteringTask2(medoids_K1, K2, getSeries, n, 120, verbose=TRUE, parll=FALSE) | |
178 | # | |
179 | # expect_equal(dim(medoids_K1), c(K1,L)) | |
180 | # expect_equal(dim(medoids_K2), c(K2,L)) | |
181 | # # Not easy to evaluate result: at least we expect it to be better than random selection of | |
182 | # # medoids within 1...K1 (among references) | |
183 | # distorGood = computeDistortion(series, medoids_K2) | |
184 | # for (i in 1:3) | |
185 | # expect_lte( distorGood, computeDistortion(series,medoids_K1[sample(1:K1, K2),]) ) | |
186 | #}) |