Commit | Line | Data |
---|---|---|
15d1825d BA |
1 | #test several clustering methods on iris dataset (setosa should be found) |
2 | test.clustering1 = function() | |
3 | { | |
4 | data(iris) | |
5 | ||
6 | #get neighborhoods from data [25 is high, but shouldn't be lower to have 1 connex comp.] | |
7 | NI = .Call("getNeighbors", as.matrix(iris[,1:4]), 25, 0.0, 1, TRUE) | |
8 | ||
9 | for (dtype in c("spath"))#,"ectd")) #bug: TODO | |
10 | { | |
11 | #get distances from neighborhoods; should be OK for all distances | |
12 | #except "simple" (which is treated as a special case with built-in R funcs) | |
13 | distances = synclust:::getDistances(dtype, NI) | |
14 | ||
15 | for (cmeth in c("KM","HC")) | |
16 | { | |
17 | #finally, get clusters | |
18 | clusters = synclust:::getClusters(distances, cmeth, K=3) | |
19 | #check that cluster 'setosa' is pure and separated | |
20 | uqclust = unique(clusters[1:50]) | |
21 | checkTrue(length(uqclust) == 1) | |
22 | checkTrue(! uqclust[1] %in% clusters[51:150]) | |
23 | } | |
24 | } | |
25 | } | |
26 | ||
27 | #test several parameters agencements on custom non-isotropic gaussian dataset (2D) | |
28 | test.clustering2 = function() | |
29 | { | |
30 | clustSize = 33 | |
31 | ||
32 | require(mvtnorm) | |
33 | set.seed(32) | |
34 | gaussian1 = rmvnorm(clustSize, mean=c(-4.0,-6.0), sigma=matrix(c(1.0,0.7,0.7,1.0),nrow=2)) | |
35 | gaussian2 = rmvnorm(clustSize, mean=c(0.0,0.0), sigma=matrix(c(1.0,0.0,0.0,1.0),nrow=2)) | |
36 | gaussian3 = rmvnorm(clustSize, mean=c(4.0,-6.0), sigma=matrix(c(1.0,-0.7,-0.7,1.0),nrow=2)) | |
37 | M = rbind(gaussian1, rbind(gaussian2, gaussian3)) | |
38 | ||
39 | #get neighborhoods from data [25 is high, but shouldn't be much lower to have 1 connex comp.] | |
40 | NI = .Call("getNeighbors", M, 25, 0.0, 1, TRUE) | |
41 | ||
42 | for (dtype in c("spath"))#,"ectd")) #TODO | |
43 | { | |
44 | #get distances from neighborhoods; should be OK for all distances | |
45 | #except "simple" (which is treated as a special case with built-in R funcs) | |
46 | distances = synclust:::getDistances(dtype, NI) | |
47 | ||
48 | for (cmeth in c("KM","HC")) | |
49 | { | |
50 | #finally, get clusters | |
51 | clusters = synclust:::getClusters(distances, cmeth, K=3) | |
52 | ||
53 | #soft check, because have to succeed at each run | |
54 | srt = sort(clusters) | |
55 | checkTrue( sum( srt[1:clustSize] == 1 ) / clustSize >= 0.8 ) | |
56 | checkTrue( sum( srt[(clustSize+1):(2*clustSize)] == 2 ) / clustSize >= 0.8 ) | |
57 | checkTrue( sum( srt[(2*clustSize+1):(3*clustSize)] == 3 ) / clustSize >= 0.8 ) | |
58 | } | |
59 | } | |
60 | } | |
61 | ||
62 | #test several parameters agencements on custom "two moons one circle" dataset (2D) | |
63 | test.clustering3 = function() | |
64 | { | |
65 | clustSize = 150 | |
66 | ||
67 | set.seed(32) | |
68 | M = matrix(nrow=3*clustSize,ncol=2) | |
69 | #big circle: radius = 10 | |
70 | rdata = runif(clustSize, min=0, max=2*pi) | |
71 | M[1:clustSize,] = 10 * cbind(cos(rdata), sin(rdata)) | |
72 | #moon 1: half circle of radius 5 centered at (-2, -0.5) | |
73 | rdata = runif(clustSize, min=0, max=pi) | |
74 | M[(clustSize+1):(2*clustSize),] = cbind(5*cos(rdata)-2, 5*sin(rdata)-0.5) | |
75 | #moon 2: half circle of radius 5 centered at (2, 0.5) | |
76 | rdata = runif(clustSize, min=pi, max=2*pi) | |
77 | M[(2*clustSize+1):(3*clustSize),] = cbind(5*cos(rdata)+2, 5*sin(rdata)+0.5) | |
78 | ||
79 | #add small global noise | |
80 | M = M + rnorm(2*clustSize,sd=0.1) | |
81 | ||
82 | #get neighborhoods from data [25 is high, but shouldn't be much lower to have 1 connex comp.] | |
83 | NI = .Call("getNeighbors", M, 25, 0.0, 1, TRUE) | |
84 | ||
85 | #only ECTD distance can be used, because forcing connexity implies | |
86 | #creating shortcuts in graph, which strongly affect spath distance | |
87 | distances = synclust:::getDistances("ectd", NI) | |
88 | ||
89 | #only hierarchical clustering can succeed here | |
90 | clusters = synclust:::getClusters(distances, "HC", K=3) | |
91 | ||
92 | srt = sort(clusters) | |
93 | checkTrue( sum( srt[1:clustSize] == 1 ) / clustSize >= 0.90 ) | |
94 | checkTrue( sum( srt[(clustSize+1):(2*clustSize)] == 2 ) / clustSize >= 0.90 ) | |
95 | checkTrue( sum( srt[(2*clustSize+1):(3*clustSize)] == 3 ) / clustSize >= 0.90 ) | |
96 | } | |
97 | ||
98 | #renumbering if clusters have too high labels | |
99 | test.reordering = function() | |
100 | { | |
101 | clusters = c(1,6,8,8,8,1,1,1,6,6,6,8,8,1,1,6,8) | |
102 | checkEquals(sort(unique(synclust:::reordering(clusters))),c(1,2,3)) | |
103 | clusters = c(23,3,23,77,77,77,1,12,12,12,77,12,23,23,12,23,77,12,23,77,1) | |
104 | checkEquals(sort(unique(synclust:::reordering(clusters))),c(1,2,3,4,5)) | |
105 | } |