Commit | Line | Data |
---|---|---|
56857861 BA |
1 | context("clustering") |
2 | ||
8702eb86 BA |
3 | #shorthand: map 1->1, 2->2, 3->3, 4->1, ..., 149->2, 150->3, ... (is base==3) |
4 | I = function(i, base) | |
5 | (i-1) %% base + 1 | |
56857861 | 6 | |
e161499b | 7 | test_that("computeClusters1&2 behave as expected", |
56857861 | 8 | { |
8702eb86 | 9 | require("MASS", quietly=TRUE) |
4bcfdbee BA |
10 | if (!require("clue", quietly=TRUE)) |
11 | skip("'clue' package not available") | |
56857861 | 12 | |
8702eb86 BA |
13 | # 3 gaussian clusters, 300 items; and then 7 gaussian clusters, 490 items |
14 | n = 300 | |
15 | d = 5 | |
16 | K = 3 | |
17 | for (ndK in list( c(300,5,3), c(490,10,7) )) | |
18 | { | |
19 | n = ndK[1] ; d = ndK[2] ; K = ndK[3] | |
20 | cs = n/K #cluster size | |
21 | Id = diag(d) | |
22 | coefs = do.call(rbind, | |
23 | lapply(1:K, function(i) MASS::mvrnorm(cs, c(rep(0,(i-1)),5,rep(0,d-i)), Id))) | |
e161499b BA |
24 | indices_medoids1 = computeClusters1(coefs, K, verbose=TRUE) |
25 | indices_medoids2 = computeClusters2(dist(coefs), K, verbose=TRUE) | |
8702eb86 | 26 | # Get coefs assignments (to medoids) |
e161499b BA |
27 | assignment1 = sapply(seq_len(n), function(i) |
28 | which.min( rowSums( sweep(coefs[indices_medoids1,],2,coefs[i,],'-')^2 ) ) ) | |
29 | assignment2 = sapply(seq_len(n), function(i) | |
30 | which.min( rowSums( sweep(coefs[indices_medoids2,],2,coefs[i,],'-')^2 ) ) ) | |
8702eb86 | 31 | for (i in 1:K) |
e161499b BA |
32 | { |
33 | expect_equal(sum(assignment1==i), cs, tolerance=5) | |
34 | expect_equal(sum(assignment2==i), cs, tolerance=5) | |
35 | } | |
8702eb86 | 36 | |
e161499b BA |
37 | costs_matrix1 = matrix(nrow=K,ncol=K) |
38 | costs_matrix2 = matrix(nrow=K,ncol=K) | |
8702eb86 BA |
39 | for (i in 1:K) |
40 | { | |
41 | for (j in 1:K) | |
42 | { | |
43 | # assign i (in result) to j (order 1,2,3) | |
e161499b BA |
44 | costs_matrix1[i,j] = abs( mean(assignment1[((i-1)*cs+1):(i*cs)]) - j ) |
45 | costs_matrix2[i,j] = abs( mean(assignment2[((i-1)*cs+1):(i*cs)]) - j ) | |
8702eb86 BA |
46 | } |
47 | } | |
e161499b BA |
48 | permutation1 = as.integer( clue::solve_LSAP(costs_matrix1) ) |
49 | permutation2 = as.integer( clue::solve_LSAP(costs_matrix2) ) | |
8702eb86 BA |
50 | for (i in 1:K) |
51 | { | |
52 | expect_equal( | |
e161499b BA |
53 | mean(assignment1[((i-1)*cs+1):(i*cs)]), permutation1[i], tolerance=0.05) |
54 | expect_equal( | |
55 | mean(assignment2[((i-1)*cs+1):(i*cs)]), permutation2[i], tolerance=0.05) | |
8702eb86 BA |
56 | } |
57 | } | |
56857861 BA |
58 | }) |
59 | ||
60 | test_that("computeSynchrones behave as expected", | |
61 | { | |
8702eb86 BA |
62 | n = 300 |
63 | x = seq(0,9.5,0.1) | |
64 | L = length(x) #96 1/4h | |
65 | K = 3 | |
66 | s1 = cos(x) | |
67 | s2 = sin(x) | |
68 | s3 = c( s1[1:(L%/%2)] , s2[(L%/%2+1):L] ) | |
69 | #sum((s1-s2)^2) == 96 | |
70 | #sum((s1-s3)^2) == 58 | |
71 | #sum((s2-s3)^2) == 38 | |
72 | s = list(s1, s2, s3) | |
73 | series = matrix(nrow=n, ncol=L) | |
74 | for (i in seq_len(n)) | |
75 | series[i,] = s[[I(i,K)]] + rnorm(L,sd=0.01) | |
76 | getRefSeries = function(indices) { | |
492cd9e7 | 77 | indices = indices[indices <= n] |
8702eb86 BA |
78 | if (length(indices)>0) series[indices,] else NULL |
79 | } | |
e161499b BA |
80 | synchrones = computeSynchrones(bigmemory::as.big.matrix(rbind(s1,s2,s3)), getRefSeries, |
81 | n, 100, verbose=TRUE, parll=FALSE) | |
56857861 | 82 | |
8702eb86 BA |
83 | expect_equal(dim(synchrones), c(K,L)) |
84 | for (i in 1:K) | |
85 | expect_equal(synchrones[i,], s[[i]], tolerance=0.01) | |
56857861 BA |
86 | }) |
87 | ||
e161499b | 88 | # NOTE: medoids can be a big.matrix |
8702eb86 BA |
89 | computeDistortion = function(series, medoids) |
90 | { | |
91 | n = nrow(series) ; L = ncol(series) | |
92 | distortion = 0. | |
e161499b BA |
93 | if (bigmemory::is.big.matrix(medoids)) |
94 | medoids = medoids[,] | |
8702eb86 BA |
95 | for (i in seq_len(n)) |
96 | distortion = distortion + min( rowSums( sweep(medoids,2,series[i,],'-')^2 ) / L ) | |
97 | distortion / n | |
98 | } | |
99 | ||
e161499b | 100 | test_that("clusteringTask1 behave as expected", |
56857861 | 101 | { |
8702eb86 BA |
102 | n = 900 |
103 | x = seq(0,9.5,0.1) | |
104 | L = length(x) #96 1/4h | |
105 | K1 = 60 | |
8702eb86 BA |
106 | s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) ) |
107 | series = matrix(nrow=n, ncol=L) | |
108 | for (i in seq_len(n)) | |
109 | series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01) | |
e161499b | 110 | getSeries = function(indices) { |
492cd9e7 | 111 | indices = indices[indices <= n] |
8702eb86 BA |
112 | if (length(indices)>0) series[indices,] else NULL |
113 | } | |
e161499b BA |
114 | wf = "haar" |
115 | ctype = "absolute" | |
116 | getContribs = function(indices) curvesToContribs(series[indices,],wf,ctype) | |
117 | indices1 = clusteringTask1(1:n, getContribs, K1, 75, verbose=TRUE, parll=FALSE) | |
118 | medoids_K1 = getSeries(indices1) | |
56857861 | 119 | |
e161499b | 120 | expect_equal(dim(medoids_K1), c(K1,L)) |
8702eb86 | 121 | # Not easy to evaluate result: at least we expect it to be better than random selection of |
e161499b BA |
122 | # medoids within initial series |
123 | distorGood = computeDistortion(series, medoids_K1) | |
8702eb86 | 124 | for (i in 1:3) |
e161499b | 125 | expect_lte( distorGood, computeDistortion(series,series[sample(1:n, K1),]) ) |
56857861 BA |
126 | }) |
127 | ||
e161499b | 128 | test_that("clusteringTask2 behave as expected", |
56857861 | 129 | { |
8702eb86 BA |
130 | n = 900 |
131 | x = seq(0,9.5,0.1) | |
132 | L = length(x) #96 1/4h | |
133 | K1 = 60 | |
134 | K2 = 3 | |
e161499b | 135 | #for (i in 1:60) {plot(x^(1+i/30)*cos(x+i),type="l",col=i,ylim=c(-50,50)); par(new=TRUE)} |
8702eb86 BA |
136 | s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) ) |
137 | series = matrix(nrow=n, ncol=L) | |
138 | for (i in seq_len(n)) | |
139 | series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01) | |
e161499b | 140 | getRefSeries = function(indices) { |
8702eb86 BA |
141 | indices = indices[indices <= n] |
142 | if (length(indices)>0) series[indices,] else NULL | |
143 | } | |
e161499b BA |
144 | # Artificially simulate 60 medoids - perfect situation, all equal to one of the refs |
145 | medoids_K1 = bigmemory::as.big.matrix( | |
146 | do.call(rbind, lapply( 1:K1, function(i) s[[I(i,K1)]] ) ) ) | |
147 | medoids_K2 = clusteringTask2(medoids_K1, K2, getRefSeries, n, 75, verbose=TRUE, parll=FALSE) | |
56857861 | 148 | |
8702eb86 BA |
149 | expect_equal(dim(medoids_K2), c(K2,L)) |
150 | # Not easy to evaluate result: at least we expect it to be better than random selection of | |
151 | # medoids within 1...K1 (among references) | |
152 | distorGood = computeDistortion(series, medoids_K2) | |
153 | for (i in 1:3) | |
154 | expect_lte( distorGood, computeDistortion(series,medoids_K1[sample(1:K1, K2),]) ) | |
56857861 | 155 | }) |
e161499b BA |
156 | |
157 | #NOTE: rather redundant test | |
158 | #test_that("clusteringTask1 + clusteringTask2 behave as expected", | |
159 | #{ | |
160 | # n = 900 | |
161 | # x = seq(0,9.5,0.1) | |
162 | # L = length(x) #96 1/4h | |
163 | # K1 = 60 | |
164 | # K2 = 3 | |
165 | # s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) ) | |
166 | # series = matrix(nrow=n, ncol=L) | |
167 | # for (i in seq_len(n)) | |
168 | # series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01) | |
169 | # getSeries = function(indices) { | |
170 | # indices = indices[indices <= n] | |
171 | # if (length(indices)>0) series[indices,] else NULL | |
172 | # } | |
173 | # wf = "haar" | |
174 | # ctype = "absolute" | |
175 | # getContribs = function(indices) curvesToContribs(series[indices,],wf,ctype) | |
176 | # require("bigmemory", quietly=TRUE) | |
177 | # indices1 = clusteringTask1(1:n, getContribs, K1, 75, verbose=TRUE, parll=FALSE) | |
178 | # medoids_K1 = bigmemory::as.big.matrix( getSeries(indices1) ) | |
179 | # medoids_K2 = clusteringTask2(medoids_K1, K2, getSeries, n, 120, verbose=TRUE, parll=FALSE) | |
180 | # | |
181 | # expect_equal(dim(medoids_K1), c(K1,L)) | |
182 | # expect_equal(dim(medoids_K2), c(K2,L)) | |
183 | # # Not easy to evaluate result: at least we expect it to be better than random selection of | |
184 | # # medoids within 1...K1 (among references) | |
185 | # distorGood = computeDistortion(series, medoids_K2) | |
186 | # for (i in 1:3) | |
187 | # expect_lte( distorGood, computeDistortion(series,medoids_K1[sample(1:K1, K2),]) ) | |
188 | #}) |