[synclust.git] / src / tests / t.kmeansClustering.c

#include "tests/helpers.h"
#include "sources/kmeansClustering.h"
#include <math.h>

//easy data: already clustered, one cluster = 1 vertex in equilateral triangle
void test_kmeansClustering1()
{
	int n=99;
	double* distances = (double*)malloc(n*n*sizeof(double));
	for (int i=0; i<n*n; i++)
		distances[i] = 1.0;
	int clustCount = 3, clustSize = n/clustCount; //33

	for (int kounter=0; kounter<clustCount; kounter++)
	{
		//cluster k: kounter*33...(kounter+1)*33-1
		for (int i=kounter*clustSize; i<(kounter+1)*clustSize; i++)
		{
			for (int j=kounter*clustSize; j<(kounter+1)*clustSize; j++)
				distances[i+n*j] = 0.0; //high-density cluster...
		}
	}

	//call to clustering algorithm
	int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100);

	ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount);
	ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 1e-10));
	free(distances);
	free(clusters);
}

//three isotropic (well separated) gaussian clusters
void test_kmeansClustering2()
{
	// generate 2D data
	int n=99, d=2;
	double* M = (double*)malloc(n*d*sizeof(double));
	int clustCount = 3, clustSize = n/clustCount; //33

	double ctrs[3][2] =
	{
		{-3.0,-3.0},
		{0.0,0.0},
		{3.0,3.0}
	};

	srand(time(NULL));
	for (int kounter=0; kounter<clustCount; kounter++)
	{
		//cluster k: kounter*33...(kounter+1)*33-1
		for (int i=kounter*clustSize; i<(kounter+1)*clustSize; i++)
		{
			double U = (double)rand()/RAND_MAX;
			double V = (double)rand()/RAND_MAX;
			double fact = sqrt(-2*log(U));
			M[i+n*0] = ctrs[kounter][0] + fact * cos(2*M_PI*V);
			M[i+n*1] = ctrs[kounter][1] + fact * sin(2*M_PI*V);
		}
	}

	// compute distances matrix
	double* distances = (double*)calloc(n*n,sizeof(double));
	for (int i=0; i<n; i++)
	{
		for (int ii=0; ii<n; ii++)
		{
			double distance = 0.0;
			for (int j=0; j<d; j++)
				distance += (M[i+n*j] - M[ii+n*j])*(M[i+n*j] - M[ii+n*j]);
			distances[i+n*ii] = sqrt(distance);
		}
	}
	free(M); //no need for initial data anymore

	//call to clustering algorithm
	int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100);

	ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount);
	//test that each cluster accounts for 1/3 of total data, +/- 10%
	ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 0.1));
	free(distances);
	free(clusters);
}
Commit	Line	Data
	1	#include "tests/helpers.h"
	2	#include "sources/kmeansClustering.h"
	3	#include <math.h>
	4
	5	//easy data: already clustered, one cluster = 1 vertex in equilateral triangle
	6	void test_kmeansClustering1()
	7	{
	8	int n=99;
	9	double* distances = (double)malloc(nn*sizeof(double));
	10	for (int i=0; i<n*n; i++)
	11	distances[i] = 1.0;
	12	int clustCount = 3, clustSize = n/clustCount; //33
	13
	14	for (int kounter=0; kounter<clustCount; kounter++)
	15	{
	16	//cluster k: kounter33...(kounter+1)33-1
	17	for (int i=kounterclustSize; i<(kounter+1)clustSize; i++)
	18	{
	19	for (int j=kounterclustSize; j<(kounter+1)clustSize; j++)
	20	distances[i+n*j] = 0.0; //high-density cluster...
	21	}
	22	}
	23
	24	//call to clustering algorithm
	25	int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100);
	26
	27	ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount);
	28	ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 1e-10));
	29	free(distances);
	30	free(clusters);
	31	}
	32
	33	//three isotropic (well separated) gaussian clusters
	34	void test_kmeansClustering2()
	35	{
	36	// generate 2D data
	37	int n=99, d=2;
	38	double* M = (double)malloc(nd*sizeof(double));
	39	int clustCount = 3, clustSize = n/clustCount; //33
	40
	41	double ctrs[3][2] =
	42	{
	43	{-3.0,-3.0},
	44	{0.0,0.0},
	45	{3.0,3.0}
	46	};
	47
	48	srand(time(NULL));
	49	for (int kounter=0; kounter<clustCount; kounter++)
	50	{
	51	//cluster k: kounter33...(kounter+1)33-1
	52	for (int i=kounterclustSize; i<(kounter+1)clustSize; i++)
	53	{
	54	double U = (double)rand()/RAND_MAX;
	55	double V = (double)rand()/RAND_MAX;
	56	double fact = sqrt(-2*log(U));
	57	M[i+n0] = ctrs[kounter][0] + fact cos(2M_PIV);
	58	M[i+n1] = ctrs[kounter][1] + fact sin(2M_PIV);
	59	}
	60	}
	61
	62	// compute distances matrix
	63	double* distances = (double)calloc(nn,sizeof(double));
	64	for (int i=0; i<n; i++)
	65	{
	66	for (int ii=0; ii<n; ii++)
	67	{
	68	double distance = 0.0;
	69	for (int j=0; j<d; j++)
	70	distance += (M[i+nj] - M[ii+nj])(M[i+nj] - M[ii+n*j]);
	71	distances[i+n*ii] = sqrt(distance);
	72	}
	73	}
	74	free(M); //no need for initial data anymore
	75
	76	//call to clustering algorithm
	77	int* clusters = kmeansWithDistances_core(distances, n, clustCount, 10, 100);
	78
	79	ASSERT_TRUE(countDistinctValues(clusters, n) == clustCount);
	80	//test that each cluster accounts for 1/3 of total data, +/- 10%
	81	ASSERT_TRUE(checkClustersProportions(clusters, n, clustCount, 0.1));
	82	free(distances);
	83	free(clusters);
	84	}