1 #include "Util/types.h"
3 #include <cds/Vector.h>
7 // auxiliary function to obtain a random sample of 1..n with k elements
8 static void sample(uint32_t* v
, uint32_t n
, uint32_t k
)
10 // refVect = (0,1,...,n-1,n)
11 uint32_t* refVect
= (uint32_t*) malloc(n
* sizeof(uint32_t));
12 for (uint32_t i
= 0; i
< n
; i
++)
15 uint32_t curSize
= n
; // current size of the sampling set
16 for (uint32_t j
= 0; j
< k
; j
++)
18 // pick an index in sampling set:
19 uint32_t index
= get_rand_int() % curSize
;
20 v
[j
] = refVect
[index
];
21 // move this index outside of sampling set:
22 refVect
[index
] = refVect
[--curSize
];
28 // assign a vector (represented by its dissimilarities to others, as dissimilarities[index,])
29 // to a cluster, represented by its center ==> output is integer in 0..K-1
30 static uint32_t assignCluster(uint32_t index
, Real
* dissimilarities
,
31 uint32_t* centers
, uint32_t n
, uint32_t K
)
33 uint32_t minIndex
= 0;
34 Real minDist
= dissimilarities
[index
* n
+ centers
[0]];
36 for (uint32_t j
= 1; j
< K
; j
++)
38 if (dissimilarities
[index
* n
+ centers
[j
]] < minDist
)
40 minDist
= dissimilarities
[index
* n
+ centers
[j
]];
48 // assign centers given a clustering, and also compute corresponding distortion
49 static void assign_centers(uint32_t nbClusters
, Vector
** clusters
, Real
* dissimilarities
,
50 uint32_t nbItems
, uint32_t* ctrs
, Real
* distor
)
53 // TODO [heuristic]: checking only a neighborhood of the former center ?
54 for (uint32_t j
= 0; j
< nbClusters
; j
++)
56 // If the cluster is empty, choose a center at random (pathological case...)
57 uint32_t minIndex
= get_rand_int() % nbItems
;
58 Real minSumDist
= INFINITY
;
59 for (uint32_t i
= 0; i
< vector_size(clusters
[j
]); i
++)
62 vector_get(clusters
[j
], i
, index1
);
63 // attempt to use current index as center
65 for (uint32_t ii
= 0; ii
< vector_size(clusters
[j
]); ii
++)
68 vector_get(clusters
[j
], ii
, index2
);
69 sumDist
+= dissimilarities
[index1
* nbItems
+ index2
];
71 if (sumDist
< minSumDist
)
78 *distor
+= minSumDist
;
82 // Core PAM algorithm from a dissimilarity matrix; (e.g. nstart=10, maxiter=100)
83 void pam(Real
* dissimilarities
, uint32_t nbItems
, uint32_t nbClusters
, int clustOnMedoids
,
84 uint32_t nbStart
, uint32_t maxNbIter
, Result_t
* result
)
86 uint32_t* ctrs
= result
->medoids_ranks
; //shorthand
87 uint32_t* oldCtrs
= (uint32_t*) malloc(nbClusters
* sizeof(uint32_t));
88 Vector
** clusters
= (Vector
**) malloc(nbClusters
* sizeof(Vector
*));
89 Vector
** bestClusts
= (Vector
**) malloc(nbClusters
* sizeof(Vector
*));
90 for (uint32_t j
= 0; j
< nbClusters
; j
++)
92 clusters
[j
] = vector_new(uint32_t);
93 bestClusts
[j
] = vector_new(uint32_t);
96 Real lastDistor
, distor
, bestDistor
= INFINITY
;
97 for (uint32_t startKount
= 0; startKount
< nbStart
; startKount
++)
99 // centers (random) [re]initialization
102 // In this special case (clustering groups of medoids), we can improve the sampling
103 uint32_t randomMedoidRank
= get_rand_int() % (nbItems
/ nbClusters
);
104 uint32_t startIndex
= randomMedoidRank
* nbClusters
;
105 for (uint32_t index
= startIndex
; index
< startIndex
+ nbClusters
; index
++)
106 ctrs
[index
- startIndex
] = index
;
109 // No information: complete random sampling
110 sample(ctrs
, nbItems
, nbClusters
);
111 for (uint32_t j
= 0; j
< nbClusters
; j
++)
113 uint32_t kounter
= 0;
116 do // while 'distortion' decreases...
118 // (re)initialize clusters to empty sets
119 for (uint32_t j
= 0; j
< nbClusters
; j
++)
120 vector_clear(clusters
[j
]);
122 // estimate clusters belongings
123 for (uint32_t i
= 0; i
< nbItems
; i
++)
125 uint32_t affectation
= assignCluster(i
, dissimilarities
, ctrs
, nbItems
, nbClusters
);
126 vector_push(clusters
[affectation
], i
);
129 // copy current centers to old centers
130 for (uint32_t j
= 0; j
< nbClusters
; j
++)
131 oldCtrs
[j
] = ctrs
[j
];
135 assign_centers(nbClusters
, clusters
, dissimilarities
, nbItems
, ctrs
, &distor
);
137 while (distor
< lastDistor
&& kounter
++ < maxNbIter
);
139 if (distor
< bestDistor
)
141 // copy current clusters into bestClusts
142 for (uint32_t j
= 0; j
< nbClusters
; j
++)
144 vector_clear(bestClusts
[j
]);
145 for (uint32_t i
= 0; i
< vector_size(clusters
[j
]); i
++)
148 vector_get(clusters
[j
], i
, index
);
149 vector_push(bestClusts
[j
], index
);
156 // Assign centers from bestClusts
157 assign_centers(nbClusters
, bestClusts
, dissimilarities
, nbItems
, ctrs
, &distor
);
160 for (uint32_t j
= 0; j
< nbClusters
; j
++)
162 vector_destroy(clusters
[j
]);
163 vector_destroy(bestClusts
[j
]);