throw away old code, prepare tests
[epclust.git] / old_C_code / stage1 / src / Algorithm / pam.c
diff --git a/old_C_code/stage1/src/Algorithm/pam.c b/old_C_code/stage1/src/Algorithm/pam.c
deleted file mode 100644 (file)
index 60e3efe..0000000
+++ /dev/null
@@ -1,167 +0,0 @@
-#include "Util/types.h"
-#include "Util/rng.h"
-#include <cgds/Vector.h>
-#include <stdlib.h>
-#include <math.h>
-
-// auxiliary function to obtain a random sample of 1..n with k elements
-static void sample(uint32_t* v, uint32_t n, uint32_t k)
-{
-       // refVect = (0,1,...,n-1,n)
-       uint32_t* refVect = (uint32_t*) malloc(n * sizeof(uint32_t));
-       for (uint32_t i = 0; i < n; i++)
-               refVect[i] = i;
-
-       uint32_t curSize = n; // current size of the sampling set
-       for (uint32_t j = 0; j < k; j++)
-       {
-               // pick an index in sampling set:
-               uint32_t index = get_rand_int() % curSize;
-               v[j] = refVect[index];
-               // move this index outside of sampling set:
-               refVect[index] = refVect[--curSize];
-       }
-
-       free(refVect);
-}
-
-// assign a vector (represented by its dissimilarities to others, as dissimilarities[index,])
-// to a cluster, represented by its center ==> output is integer in 0..K-1
-static uint32_t assignCluster(uint32_t index, float* dissimilarities,
-       uint32_t* centers, uint32_t n, uint32_t K)
-{
-       uint32_t minIndex = 0;
-       float minDist = dissimilarities[index * n + centers[0]];
-
-       for (uint32_t j = 1; j < K; j++)
-       {
-               if (dissimilarities[index * n + centers[j]] < minDist)
-               {
-                       minDist = dissimilarities[index * n + centers[j]];
-                       minIndex = j;
-               }
-       }
-
-       return minIndex;
-}
-
-// assign centers given a clustering, and also compute corresponding distortion
-static void assign_centers(uint32_t nbClusters, Vector** clusters, float* dissimilarities,
-       uint32_t nbItems, uint32_t* ctrs, float* distor)
-{
-       *distor = 0.0;
-       // TODO [heuristic]: checking only a neighborhood of the former center ?
-       for (uint32_t j = 0; j < nbClusters; j++)
-       {
-               // If the cluster is empty, choose a center at random (pathological case...)
-               uint32_t minIndex = get_rand_int() % nbItems;
-               float minSumDist = INFINITY;
-               for (uint32_t i = 0; i < vector_size(clusters[j]); i++)
-               {
-                       uint32_t index1;
-                       vector_get(clusters[j], i, index1);
-                       // attempt to use current index as center
-                       float sumDist = 0.0;
-                       for (uint32_t ii = 0; ii < vector_size(clusters[j]); ii++)
-                       {
-                               uint32_t index2;
-                               vector_get(clusters[j], ii, index2);
-                               sumDist += dissimilarities[index1 * nbItems + index2];
-                       }
-                       if (sumDist < minSumDist)
-                       {
-                               minSumDist = sumDist;
-                               minIndex = index1;
-                       }
-               }
-               ctrs[j] = minIndex;
-               *distor += minSumDist;
-       }
-}
-
-// Core PAM algorithm from a dissimilarity matrix; (e.g. nstart=10, maxiter=100)
-void pam(float* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids,
-       uint32_t nbStart, uint32_t maxNbIter, Result_t* result)
-{
-       uint32_t* ctrs = result->medoids_ranks; //shorthand
-       uint32_t* oldCtrs = (uint32_t*) malloc(nbClusters * sizeof(uint32_t));
-       Vector** clusters = (Vector**) malloc(nbClusters * sizeof(Vector*));
-       Vector** bestClusts = (Vector**) malloc(nbClusters * sizeof(Vector*));
-       for (uint32_t j = 0; j < nbClusters; j++)
-       {
-               clusters[j] = vector_new(uint32_t);
-               bestClusts[j] = vector_new(uint32_t);
-       }
-
-       float lastDistor, distor, bestDistor = INFINITY;
-       for (uint32_t startKount = 0; startKount < nbStart; startKount++)
-       {
-               // centers (random) [re]initialization
-               if (clustOnMedoids)
-               {
-                       // In this special case (clustering groups of medoids), we can improve the sampling
-                       uint32_t randomMedoidRank = get_rand_int() % (nbItems / nbClusters);
-                       uint32_t startIndex = randomMedoidRank * nbClusters;
-                       for (uint32_t index = startIndex; index < startIndex + nbClusters; index++)
-                               ctrs[index - startIndex] = index;
-               }
-               else
-                       // No information: complete random sampling
-                       sample(ctrs, nbItems, nbClusters);
-               for (uint32_t j = 0; j < nbClusters; j++)
-                       oldCtrs[j] = 0;
-               uint32_t kounter = 0;
-
-               distor = INFINITY;
-               do // while 'distortion' decreases...
-               {
-                       // (re)initialize clusters to empty sets
-                       for (uint32_t j = 0; j < nbClusters; j++)
-                               vector_clear(clusters[j]);
-
-                       // estimate clusters belongings
-                       for (uint32_t i = 0; i < nbItems; i++)
-                       {
-                               uint32_t affectation = assignCluster(i, dissimilarities, ctrs, nbItems, nbClusters);
-                               vector_push(clusters[affectation], i);
-                       }
-
-                       // copy current centers to old centers
-                       for (uint32_t j = 0; j < nbClusters; j++)
-                               oldCtrs[j] = ctrs[j];
-
-                       // recompute centers
-                       lastDistor = distor;
-                       assign_centers(nbClusters, clusters, dissimilarities, nbItems, ctrs, &distor);
-               }
-               while (distor < lastDistor && kounter++ < maxNbIter);
-
-               if (distor < bestDistor)
-               {
-                       // copy current clusters into bestClusts
-                       for (uint32_t j = 0; j < nbClusters; j++)
-                       {
-                               vector_clear(bestClusts[j]);
-                               for (uint32_t i = 0; i < vector_size(clusters[j]); i++)
-                               {
-                                       uint32_t index;
-                                       vector_get(clusters[j], i, index);
-                                       vector_push(bestClusts[j], index);
-                               }
-                       }
-                       bestDistor = distor;
-               }
-       }
-
-       // Assign centers from bestClusts
-       assign_centers(nbClusters, bestClusts, dissimilarities, nbItems, ctrs, &distor);
-
-       free(oldCtrs);
-       for (uint32_t j = 0; j < nbClusters; j++)
-       {
-               vector_destroy(clusters[j]);
-               vector_destroy(bestClusts[j]);
-       }
-       free(clusters);
-       free(bestClusts);
-}