310c9ca0249e301bd13dd2ddbf1261863aa37cae
1 #include "tests/helpers.h"
3 //auxiliary to check vectors equality
4 int checkEqualV(double* v1
, double* v2
, int n
)
6 double epsilon
= 1e-10; // arbitrary small value to make comparisons
7 for (int i
=0; i
<n
; i
++)
9 if (fabs(v1
[i
] - v2
[i
]) >= epsilon
)
15 // auxiliary to count distinct values in an integer array
16 int countDistinctValues(int* v
, int n
)
19 for (int i
=1; i
<n
; i
++)
24 int* kountArray
= (int*)calloc(maxVal
+1,sizeof(int));
26 for (int i
=0; i
<n
; i
++)
28 if (kountArray
[v
[i
]] == 0)
31 kountArray
[v
[i
]] = 1; // mark this value as "seen"
38 // helper to check clustering result
39 int labelIsProcessed(int label
, int* processedLabels
, int countProcessedLabels
)
41 for (int j
=0; j
<countProcessedLabels
; j
++)
43 if (processedLabels
[j
] == label
)
49 // check both clusters purity and size (ideally they all contain n/clustCount points)
50 int checkClustersProportions(int* clusters
, int n
, int clustCount
, double tol
)
52 // initialize array to keep track of clusters labels
53 int* processedLabels
= (int*)malloc(clustCount
*sizeof(int));
54 for (int j
=0; j
<clustCount
; j
++)
55 processedLabels
[j
] = -1;
56 int countProcessedLabels
= 0, clustSize
= n
/clustCount
;
61 // go to the next unprocessed label (if possible)
62 while (i
< n
&& labelIsProcessed(clusters
[i
], processedLabels
, countProcessedLabels
))
69 int label
= clusters
[i
];
70 processedLabels
[countProcessedLabels
++] = label
;
72 // count elements in current cluster (represented by label)
73 int countDataWithCurLabel
= 0;
74 for (int ii
=0; ii
<n
; ii
++)
76 if (clusters
[ii
] == label
)
77 countDataWithCurLabel
++;
79 // if cluster is too small or too big, test fails
80 if ( fabs(countDataWithCurLabel
- clustSize
) / n
> tol
)
82 free(processedLabels
);
86 // now check counts per cluster (repartition);
87 // the labels should not be spread between different (true) groups
88 int maxCountLabelInClust
= 0;
89 for (int kounter
=0; kounter
<clustCount
; kounter
++)
91 int countLabelInClust
= 0;
92 for (int ii
=kounter
*clustSize
; ii
<(kounter
+1)*clustSize
; ii
++)
94 if (clusters
[ii
] == label
)
97 if (countLabelInClust
> maxCountLabelInClust
)
98 maxCountLabelInClust
= countLabelInClust
;
100 // we must have max(repartition) / clustSize >= 1 - tol
101 if ((double)maxCountLabelInClust
/ clustSize
< 1.0 - tol
)
103 free(processedLabels
);
108 free(processedLabels
);