| 1 | #test several clustering methods on iris dataset (setosa should be found) |
| 2 | test.clustering1 = function() |
| 3 | { |
| 4 | data(iris) |
| 5 | |
| 6 | #get neighborhoods from data [25 is high, but shouldn't be lower to have 1 connex comp.] |
| 7 | NI = .Call("getNeighbors", as.matrix(iris[,1:4]), 25, 0.0, 1, TRUE) |
| 8 | |
| 9 | for (dtype in c("spath"))#,"ectd")) #bug: TODO |
| 10 | { |
| 11 | #get distances from neighborhoods; should be OK for all distances |
| 12 | #except "simple" (which is treated as a special case with built-in R funcs) |
| 13 | distances = synclust:::getDistances(dtype, NI) |
| 14 | |
| 15 | for (cmeth in c("KM","HC")) |
| 16 | { |
| 17 | #finally, get clusters |
| 18 | clusters = synclust:::getClusters(distances, cmeth, K=3) |
| 19 | #check that cluster 'setosa' is pure and separated |
| 20 | uqclust = unique(clusters[1:50]) |
| 21 | checkTrue(length(uqclust) == 1) |
| 22 | checkTrue(! uqclust[1] %in% clusters[51:150]) |
| 23 | } |
| 24 | } |
| 25 | } |
| 26 | |
| 27 | #test several parameters agencements on custom non-isotropic gaussian dataset (2D) |
| 28 | test.clustering2 = function() |
| 29 | { |
| 30 | clustSize = 33 |
| 31 | |
| 32 | require(mvtnorm) |
| 33 | set.seed(32) |
| 34 | gaussian1 = rmvnorm(clustSize, mean=c(-4.0,-6.0), sigma=matrix(c(1.0,0.7,0.7,1.0),nrow=2)) |
| 35 | gaussian2 = rmvnorm(clustSize, mean=c(0.0,0.0), sigma=matrix(c(1.0,0.0,0.0,1.0),nrow=2)) |
| 36 | gaussian3 = rmvnorm(clustSize, mean=c(4.0,-6.0), sigma=matrix(c(1.0,-0.7,-0.7,1.0),nrow=2)) |
| 37 | M = rbind(gaussian1, rbind(gaussian2, gaussian3)) |
| 38 | |
| 39 | #get neighborhoods from data [25 is high, but shouldn't be much lower to have 1 connex comp.] |
| 40 | NI = .Call("getNeighbors", M, 25, 0.0, 1, TRUE) |
| 41 | |
| 42 | for (dtype in c("spath"))#,"ectd")) #TODO |
| 43 | { |
| 44 | #get distances from neighborhoods; should be OK for all distances |
| 45 | #except "simple" (which is treated as a special case with built-in R funcs) |
| 46 | distances = synclust:::getDistances(dtype, NI) |
| 47 | |
| 48 | for (cmeth in c("KM","HC")) |
| 49 | { |
| 50 | #finally, get clusters |
| 51 | clusters = synclust:::getClusters(distances, cmeth, K=3) |
| 52 | |
| 53 | #soft check, because have to succeed at each run |
| 54 | srt = sort(clusters) |
| 55 | checkTrue( sum( srt[1:clustSize] == 1 ) / clustSize >= 0.8 ) |
| 56 | checkTrue( sum( srt[(clustSize+1):(2*clustSize)] == 2 ) / clustSize >= 0.8 ) |
| 57 | checkTrue( sum( srt[(2*clustSize+1):(3*clustSize)] == 3 ) / clustSize >= 0.8 ) |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | #test several parameters agencements on custom "two moons one circle" dataset (2D) |
| 63 | test.clustering3 = function() |
| 64 | { |
| 65 | clustSize = 150 |
| 66 | |
| 67 | set.seed(32) |
| 68 | M = matrix(nrow=3*clustSize,ncol=2) |
| 69 | #big circle: radius = 10 |
| 70 | rdata = runif(clustSize, min=0, max=2*pi) |
| 71 | M[1:clustSize,] = 10 * cbind(cos(rdata), sin(rdata)) |
| 72 | #moon 1: half circle of radius 5 centered at (-2, -0.5) |
| 73 | rdata = runif(clustSize, min=0, max=pi) |
| 74 | M[(clustSize+1):(2*clustSize),] = cbind(5*cos(rdata)-2, 5*sin(rdata)-0.5) |
| 75 | #moon 2: half circle of radius 5 centered at (2, 0.5) |
| 76 | rdata = runif(clustSize, min=pi, max=2*pi) |
| 77 | M[(2*clustSize+1):(3*clustSize),] = cbind(5*cos(rdata)+2, 5*sin(rdata)+0.5) |
| 78 | |
| 79 | #add small global noise |
| 80 | M = M + rnorm(2*clustSize,sd=0.1) |
| 81 | |
| 82 | #get neighborhoods from data [25 is high, but shouldn't be much lower to have 1 connex comp.] |
| 83 | NI = .Call("getNeighbors", M, 25, 0.0, 1, TRUE) |
| 84 | |
| 85 | #only ECTD distance can be used, because forcing connexity implies |
| 86 | #creating shortcuts in graph, which strongly affect spath distance |
| 87 | distances = synclust:::getDistances("ectd", NI) |
| 88 | |
| 89 | #only hierarchical clustering can succeed here |
| 90 | clusters = synclust:::getClusters(distances, "HC", K=3) |
| 91 | |
| 92 | srt = sort(clusters) |
| 93 | checkTrue( sum( srt[1:clustSize] == 1 ) / clustSize >= 0.90 ) |
| 94 | checkTrue( sum( srt[(clustSize+1):(2*clustSize)] == 2 ) / clustSize >= 0.90 ) |
| 95 | checkTrue( sum( srt[(2*clustSize+1):(3*clustSize)] == 3 ) / clustSize >= 0.90 ) |
| 96 | } |
| 97 | |
| 98 | #renumbering if clusters have too high labels |
| 99 | test.reordering = function() |
| 100 | { |
| 101 | clusters = c(1,6,8,8,8,1,1,1,6,6,6,8,8,1,1,6,8) |
| 102 | checkEquals(sort(unique(synclust:::reordering(clusters))),c(1,2,3)) |
| 103 | clusters = c(23,3,23,77,77,77,1,12,12,12,77,12,23,23,12,23,77,12,23,77,1) |
| 104 | checkEquals(sort(unique(synclust:::reordering(clusters))),c(1,2,3,4,5)) |
| 105 | } |