first commit
[synclust.git] / src / tests / t.kmeansClustering.c
1 #include "tests/helpers.h"
2 #include "sources/kmeansClustering.h"
3 #include <math.h>
4
5 //easy data: already clustered, one cluster = 1 vertex in equilateral triangle
6 void test_kmeansClustering1()
7 {
8 int n=99;
9 double* distances = (double*)malloc(n*n*sizeof(double));
10 for (int i=0; i<n*n; i++)
11 distances[i] = 1.0;
12 int clustCount = 3, clustSize = n/clustCount; //33
13
14 for (int kounter=0; kounter<clustCount; kounter++)
15 {
16 //cluster k: kounter*33...(kounter+1)*33-1
17 for (int i=kounter*clustSize; i<(kounter+1)*clustSize; i++)
18 {
19 for (int j=kounter*clustSize; j<(kounter+1)*clustSize; j++)
20 distances[i+n*j] = 0.0; //high-density cluster...
21 }
22 }
23
24 //call to clustering algorithm
25 int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100);
26
27 ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount);
28 ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 1e-10));
29 free(distances);
30 free(clusters);
31 }
32
33 //three isotropic (well separated) gaussian clusters
34 void test_kmeansClustering2()
35 {
36 // generate 2D data
37 int n=99, d=2;
38 double* M = (double*)malloc(n*d*sizeof(double));
39 int clustCount = 3, clustSize = n/clustCount; //33
40
41 double ctrs[3][2] =
42 {
43 {-3.0,-3.0},
44 {0.0,0.0},
45 {3.0,3.0}
46 };
47
48 srand(time(NULL));
49 for (int kounter=0; kounter<clustCount; kounter++)
50 {
51 //cluster k: kounter*33...(kounter+1)*33-1
52 for (int i=kounter*clustSize; i<(kounter+1)*clustSize; i++)
53 {
54 double U = (double)rand()/RAND_MAX;
55 double V = (double)rand()/RAND_MAX;
56 double fact = sqrt(-2*log(U));
57 M[i+n*0] = ctrs[kounter][0] + fact * cos(2*M_PI*V);
58 M[i+n*1] = ctrs[kounter][1] + fact * sin(2*M_PI*V);
59 }
60 }
61
62 // compute distances matrix
63 double* distances = (double*)calloc(n*n,sizeof(double));
64 for (int i=0; i<n; i++)
65 {
66 for (int ii=0; ii<n; ii++)
67 {
68 double distance = 0.0;
69 for (int j=0; j<d; j++)
70 distance += (M[i+n*j] - M[ii+n*j])*(M[i+n*j] - M[ii+n*j]);
71 distances[i+n*ii] = sqrt(distance);
72 }
73 }
74 free(M); //no need for initial data anymore
75
76 //call to clustering algorithm
77 int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100);
78
79 ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount);
80 //test that each cluster accounts for 1/3 of total data, +/- 10%
81 ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 0.1));
82 free(distances);
83 free(clusters);
84 }