Commit | Line | Data |
---|---|---|
15d1825d BA |
1 | #include "tests/helpers.h" |
2 | #include "sources/kmeansClustering.h" | |
3 | #include <math.h> | |
4 | ||
5 | //easy data: already clustered, one cluster = 1 vertex in equilateral triangle | |
6 | void test_kmeansClustering1() | |
7 | { | |
8 | int n=99; | |
9 | double* distances = (double*)malloc(n*n*sizeof(double)); | |
10 | for (int i=0; i<n*n; i++) | |
11 | distances[i] = 1.0; | |
12 | int clustCount = 3, clustSize = n/clustCount; //33 | |
13 | ||
14 | for (int kounter=0; kounter<clustCount; kounter++) | |
15 | { | |
16 | //cluster k: kounter*33...(kounter+1)*33-1 | |
17 | for (int i=kounter*clustSize; i<(kounter+1)*clustSize; i++) | |
18 | { | |
19 | for (int j=kounter*clustSize; j<(kounter+1)*clustSize; j++) | |
20 | distances[i+n*j] = 0.0; //high-density cluster... | |
21 | } | |
22 | } | |
23 | ||
24 | //call to clustering algorithm | |
25 | int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100); | |
26 | ||
27 | ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount); | |
28 | ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 1e-10)); | |
29 | free(distances); | |
30 | free(clusters); | |
31 | } | |
32 | ||
33 | //three isotropic (well separated) gaussian clusters | |
34 | void test_kmeansClustering2() | |
35 | { | |
36 | // generate 2D data | |
37 | int n=99, d=2; | |
38 | double* M = (double*)malloc(n*d*sizeof(double)); | |
39 | int clustCount = 3, clustSize = n/clustCount; //33 | |
40 | ||
41 | double ctrs[3][2] = | |
42 | { | |
43 | {-3.0,-3.0}, | |
44 | {0.0,0.0}, | |
45 | {3.0,3.0} | |
46 | }; | |
47 | ||
48 | srand(time(NULL)); | |
49 | for (int kounter=0; kounter<clustCount; kounter++) | |
50 | { | |
51 | //cluster k: kounter*33...(kounter+1)*33-1 | |
52 | for (int i=kounter*clustSize; i<(kounter+1)*clustSize; i++) | |
53 | { | |
54 | double U = (double)rand()/RAND_MAX; | |
55 | double V = (double)rand()/RAND_MAX; | |
56 | double fact = sqrt(-2*log(U)); | |
57 | M[i+n*0] = ctrs[kounter][0] + fact * cos(2*M_PI*V); | |
58 | M[i+n*1] = ctrs[kounter][1] + fact * sin(2*M_PI*V); | |
59 | } | |
60 | } | |
61 | ||
62 | // compute distances matrix | |
63 | double* distances = (double*)calloc(n*n,sizeof(double)); | |
64 | for (int i=0; i<n; i++) | |
65 | { | |
66 | for (int ii=0; ii<n; ii++) | |
67 | { | |
68 | double distance = 0.0; | |
69 | for (int j=0; j<d; j++) | |
70 | distance += (M[i+n*j] - M[ii+n*j])*(M[i+n*j] - M[ii+n*j]); | |
71 | distances[i+n*ii] = sqrt(distance); | |
72 | } | |
73 | } | |
74 | free(M); //no need for initial data anymore | |
75 | ||
76 | //call to clustering algorithm | |
77 | int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100); | |
78 | ||
79 | ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount); | |
80 | //test that each cluster accounts for 1/3 of total data, +/- 10% | |
81 | ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 0.1)); | |
82 | free(distances); | |
83 | free(clusters); | |
84 | } |