epclust/tests/testthat/test-clustering.R

   1 context("clustering")
   2
   3 test_that("clusteringTask1 behave as expected",
   4 {
   5     # Generate 60 reference sinusoïdal series (medoids to be found),
   6     # and sample 900 series around them (add a small noise)
   7     n = 900
   8     x = seq(0,9.5,0.1)
   9     L = length(x) #96 1/4h
  10     K1 = 60
  11     s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) )
  12     series = matrix(nrow=L, ncol=n)
  13     for (i in seq_len(n))
  14         series[,i] = s[[I(i,K1)]] + rnorm(L,sd=0.01)
  15
  16     getSeries = function(indices) {
  17         indices = indices[indices <= n]
  18         if (length(indices)>0) as.matrix(series[,indices]) else NULL
  19     }
  20
  21     wf = "haar"
  22     ctype = "absolute"
  23     getContribs = function(indices) curvesToContribs(as.matrix(series[,indices]),wf,ctype)
  24
  25     require("cluster", quietly=TRUE)
  26     algoClust1 = function(contribs,K) cluster::pam(t(contribs),K,diss=FALSE)$id.med
  27     indices1 = clusteringTask1(1:n, getContribs, K1, algoClust1, 75, verbose=TRUE, parll=FALSE)
  28     medoids_K1 = getSeries(indices1)
  29
  30     expect_equal(dim(medoids_K1), c(L,K1))
  31     # Not easy to evaluate result: at least we expect it to be better than random selection of
  32     # medoids within initial series
  33     distor_good = computeDistortion(series, medoids_K1)
  34     for (i in 1:3)
  35         expect_lte( distor_good, computeDistortion(series,series[,sample(1:n, K1)]) )
  36 })
  37
  38 test_that("clusteringTask2 behave as expected",
  39 {
  40     skip("Unexplained failure")
  41
  42     # Same 60 reference sinusoïdal series than in clusteringTask1 test,
  43     # but this time we consider them as medoids - skipping stage 1
  44     # Here also we sample 900 series around the 60 "medoids"
  45     n = 900
  46     x = seq(0,9.5,0.1)
  47     L = length(x) #96 1/4h
  48     K1 = 60
  49     K2 = 3
  50     #for (i in 1:60) {plot(x^(1+i/30)*cos(x+i),type="l",col=i,ylim=c(-50,50)); par(new=TRUE)}
  51     s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) )
  52     series = matrix(nrow=L, ncol=n)
  53     for (i in seq_len(n))
  54         series[,i] = s[[I(i,K1)]] + rnorm(L,sd=0.01)
  55
  56     getRefSeries = function(indices) {
  57         indices = indices[indices <= n]
  58         if (length(indices)>0) as.matrix(series[,indices]) else NULL
  59     }
  60
  61     # Perfect situation: all medoids "after stage 1" are good.
  62     medoids_K1 = bigmemory::as.big.matrix( sapply( 1:K1, function(i) s[[I(i,K1)]] ) )
  63     algoClust2 = function(dists,K) cluster::pam(dists,K,diss=TRUE)$id.med
  64     medoids_K2 = clusteringTask2(medoids_K1, K2, algoClust2, getRefSeries,
  65         n, 75, 4, 8, "little", verbose=TRUE, parll=FALSE)
  66
  67     expect_equal(dim(medoids_K2), c(L,K2))
  68     # Not easy to evaluate result: at least we expect it to be better than random selection of
  69     # synchrones within 1...K1 (from where distances computations + clustering was run)
  70     synchrones = computeSynchrones(medoids_K1,getRefSeries,n,75,verbose=FALSE,parll=FALSE)
  71     distor_good = computeDistortion(synchrones, medoids_K2)
  72     for (i in 1:3)
  73         expect_lte( distor_good, computeDistortion(synchrones, synchrones[,sample(1:K1,3)]) )
  74 })
  75
  76 # Compute the sum of (normalized) sum of squares of closest distances to a medoid.
  77 # Note: medoids can be a big.matrix
  78 computeDistortion = function(series, medoids)
  79 {
  80     if (bigmemory::is.big.matrix(medoids))
  81         medoids = medoids[,] #extract standard matrix
  82
  83     n = ncol(series) ; L = nrow(series)
  84     distortion = 0.
  85     for (i in seq_len(n))
  86         distortion = distortion + min( colSums( sweep(medoids,1,series[,i],'-')^2 ) / L )
  87
  88     sqrt( distortion / n )
  89 }