e44e5e50456b1b21efafbfcc024ae69c64e5a5af
1 #include "tests/helpers.h"
2 #include "sources/kmeansClustering.h"
5 //easy data: already clustered, one cluster = 1 vertex in equilateral triangle
6 void test_kmeansClustering1()
9 double* distances
= (double*)malloc(n
*n
*sizeof(double));
10 for (int i
=0; i
<n
*n
; i
++)
12 int clustCount
= 3, clustSize
= n
/clustCount
; //33
14 for (int kounter
=0; kounter
<clustCount
; kounter
++)
16 //cluster k: kounter*33...(kounter+1)*33-1
17 for (int i
=kounter
*clustSize
; i
<(kounter
+1)*clustSize
; i
++)
19 for (int j
=kounter
*clustSize
; j
<(kounter
+1)*clustSize
; j
++)
20 distances
[i
+n
*j
] = 0.0; //high-density cluster...
24 //call to clustering algorithm
25 int* clusters
= kmeansWithDistances_core(distances
, n
, clustCount
, 10, 100);
27 ASSERT_TRUE(countDistinctValues(clusters
, n
) == clustCount
);
28 ASSERT_TRUE(checkClustersProportions(clusters
, n
, clustCount
, 1e-10));
33 //three isotropic (well separated) gaussian clusters
34 void test_kmeansClustering2()
38 double* M
= (double*)malloc(n
*d
*sizeof(double));
39 int clustCount
= 3, clustSize
= n
/clustCount
; //33
49 for (int kounter
=0; kounter
<clustCount
; kounter
++)
51 //cluster k: kounter*33...(kounter+1)*33-1
52 for (int i
=kounter
*clustSize
; i
<(kounter
+1)*clustSize
; i
++)
54 double U
= (double)rand()/RAND_MAX
;
55 double V
= (double)rand()/RAND_MAX
;
56 double fact
= sqrt(-2*log(U
));
57 M
[i
+n
*0] = ctrs
[kounter
][0] + fact
* cos(2*M_PI
*V
);
58 M
[i
+n
*1] = ctrs
[kounter
][1] + fact
* sin(2*M_PI
*V
);
62 // compute distances matrix
63 double* distances
= (double*)calloc(n
*n
,sizeof(double));
64 for (int i
=0; i
<n
; i
++)
66 for (int ii
=0; ii
<n
; ii
++)
68 double distance
= 0.0;
69 for (int j
=0; j
<d
; j
++)
70 distance
+= (M
[i
+n
*j
] - M
[ii
+n
*j
])*(M
[i
+n
*j
] - M
[ii
+n
*j
]);
71 distances
[i
+n
*ii
] = sqrt(distance
);
74 free(M
); //no need for initial data anymore
76 //call to clustering algorithm
77 int* clusters
= kmeansWithDistances_core(distances
, n
, clustCount
, 10, 100);
79 ASSERT_TRUE(countDistinctValues(clusters
, n
) == clustCount
);
80 //test that each cluster accounts for 1/3 of total data, +/- 10%
81 ASSERT_TRUE(checkClustersProportions(clusters
, n
, clustCount
, 0.1));