Commit | Line | Data |
---|---|---|
15d1825d BA |
1 | #include "tests/helpers.h" |
2 | ||
3 | //auxiliary to check vectors equality | |
4 | int checkEqualV(double* v1, double* v2, int n) | |
5 | { | |
6 | double epsilon = 1e-10; // arbitrary small value to make comparisons | |
7 | for (int i=0; i<n; i++) | |
8 | { | |
9 | if (fabs(v1[i] - v2[i]) >= epsilon) | |
10 | return S_FALSE; | |
11 | } | |
12 | return S_TRUE; | |
13 | } | |
14 | ||
15 | // auxiliary to count distinct values in an integer array | |
16 | int countDistinctValues(int* v, int n) | |
17 | { | |
18 | int maxVal = v[0]; | |
19 | for (int i=1; i<n; i++) | |
20 | { | |
21 | if (v[i] > maxVal) | |
22 | maxVal = v[i]; | |
23 | } | |
24 | int* kountArray = (int*)calloc(maxVal+1,sizeof(int)); | |
25 | int res = 0; | |
26 | for (int i=0; i<n; i++) | |
27 | { | |
28 | if (kountArray[v[i]] == 0) | |
29 | { | |
30 | res++; | |
31 | kountArray[v[i]] = 1; // mark this value as "seen" | |
32 | } | |
33 | } | |
34 | free(kountArray); | |
35 | return res; | |
36 | } | |
37 | ||
38 | // helper to check clustering result | |
39 | int labelIsProcessed(int label, int* processedLabels, int countProcessedLabels) | |
40 | { | |
41 | for (int j=0; j<countProcessedLabels; j++) | |
42 | { | |
43 | if (processedLabels[j] == label) | |
44 | return S_TRUE; | |
45 | } | |
46 | return S_FALSE; | |
47 | } | |
48 | ||
49 | // check both clusters purity and size (ideally they all contain n/clustCount points) | |
50 | int checkClustersProportions(int* clusters, int n, int clustCount, double tol) | |
51 | { | |
52 | // initialize array to keep track of clusters labels | |
53 | int* processedLabels = (int*)malloc(clustCount*sizeof(int)); | |
54 | for (int j=0; j<clustCount; j++) | |
55 | processedLabels[j] = -1; | |
56 | int countProcessedLabels = 0, clustSize = n/clustCount; | |
57 | ||
58 | int i=0; | |
59 | while (i<n) | |
60 | { | |
61 | // go to the next unprocessed label (if possible) | |
62 | while (i < n && labelIsProcessed(clusters[i], processedLabels, countProcessedLabels)) | |
63 | { | |
64 | i++; | |
65 | } | |
66 | if (i >= n) | |
67 | break; | |
68 | ||
69 | int label = clusters[i]; | |
70 | processedLabels[countProcessedLabels++] = label; | |
71 | ||
72 | // count elements in current cluster (represented by label) | |
73 | int countDataWithCurLabel = 0; | |
74 | for (int ii=0; ii<n; ii++) | |
75 | { | |
76 | if (clusters[ii] == label) | |
77 | countDataWithCurLabel++; | |
78 | } | |
79 | // if cluster is too small or too big, test fails | |
80 | if ( fabs(countDataWithCurLabel - clustSize) / n > tol ) | |
81 | { | |
82 | free(processedLabels); | |
83 | return S_FALSE; | |
84 | } | |
85 | ||
86 | // now check counts per cluster (repartition); | |
87 | // the labels should not be spread between different (true) groups | |
88 | int maxCountLabelInClust = 0; | |
89 | for (int kounter=0; kounter<clustCount; kounter++) | |
90 | { | |
91 | int countLabelInClust = 0; | |
92 | for (int ii=kounter*clustSize; ii<(kounter+1)*clustSize; ii++) | |
93 | { | |
94 | if (clusters[ii] == label) | |
95 | countLabelInClust++; | |
96 | } | |
97 | if (countLabelInClust > maxCountLabelInClust) | |
98 | maxCountLabelInClust = countLabelInClust; | |
99 | } | |
100 | // we must have max(repartition) / clustSize >= 1 - tol | |
101 | if ((double)maxCountLabelInClust / clustSize < 1.0 - tol) | |
102 | { | |
103 | free(processedLabels); | |
104 | return S_FALSE; | |
105 | } | |
106 | } | |
107 | ||
108 | free(processedLabels); | |
109 | return S_TRUE; | |
110 | } |