throw away old code, prepare tests
[epclust.git] / old_C_code / stage1 / src / main.c
diff --git a/old_C_code/stage1/src/main.c b/old_C_code/stage1/src/main.c
deleted file mode 100644 (file)
index d1f7965..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-#include "MPI_Main/master.h"
-#include "MPI_Main/slave.h"
-#include "Util/utils.h"
-#include "Util/rng.h"
-#include <sys/stat.h>
-#include <mpi.h>
-#include <math.h>
-#include <stdio.h>
-#include "TimeSeries/serialize.h"
-#include "TimeSeries/deserialize.h"
-#include "Classification/getClass.h"
-#include <string.h>
-#include <cgds/Vector.h>
-#include <libxml/xmlreader.h>
-
-// serialize text file argv[1] into a binary file argv[2]
-int serialize_main(int argc, char** argv)
-{
-       const char* ifileName = argv[1];
-       const char* ofileName = argv[2];
-       int byCols = atoi(argv[3]);
-       uint32_t nbItems = atoi(argv[4]); //==0 for "all series"
-       
-       if (byCols)
-               serialize_byCols(ifileName, ofileName, nbItems);
-       else
-               serialize_byRows(ifileName, ofileName, nbItems);
-       return 0;
-}
-
-// deserialize binary file argv[1] into text file argv[2]
-int deserialize_main(int argc, char** argv)
-{
-       const char* ifileName = argv[1];
-       const char* ofileName = argv[2];
-       Vector* vranks = vector_new(uint32_t);
-       //each token is at most two ints (a-b = from a to b included)
-       char* token = strtok(argv[3], ",");
-       int retrieveAll = 0;
-       uint32_t* ranks = NULL;
-       while (token)
-       {
-               //scan token to find middle position of '-' (if any)
-               int minusPos = -1;
-               int tokenLength = strlen(token);
-               //loop starts at index 1 because -N is allowed (and means 'everything')
-               for (int i=1; i<tokenLength; i++)
-               {
-                       if (token[i] == '-')
-                       {
-                               minusPos = i;
-                               break;
-                       }
-               }
-               if (minusPos < 0)
-               {
-                       int64_t rank = (int64_t)atoi(token);
-                       if (rank <= 0)
-                       {
-                               retrieveAll = 1;
-                               break;
-                       }
-                       vector_push(vranks, (uint32_t)rank);
-               }
-               else
-               {
-                       token[minusPos] = 0;
-                       int int1 = atoi(token);
-                       int int2 = atoi(token+minusPos+1);
-                       for (uint32_t i=int1; i<=int2; i++)
-                               vector_push(vranks, i);
-               }
-               token = strtok(NULL, ",");
-       }
-       uint32_t nbRanks = retrieveAll
-               ? 0
-               : vector_size(vranks);
-       if (!retrieveAll)
-       {
-               ranks = (uint32_t*) malloc(nbRanks*sizeof(uint32_t));
-               for (uint32_t i=0; i<nbRanks; i++)
-               {
-                       vector_get(vranks, i, ranks[i]);
-                       ranks[i]--; //re-express on 0...{n-1}
-               }
-       }
-       vector_destroy(vranks);
-       
-       deserialize(ifileName, ofileName, ranks, nbRanks);
-       return 0;
-}
-
-//main clustering task (master or slave)
-int cluster_main(int argc, char **argv)
-{
-       MPI_Init(&argc, &argv);
-
-       char* ifileName = argv[1]; //could be "../data/test.bin"
-       uint32_t nbSeriesInChunk = atoi(argv[2]); //could be 3000
-       uint32_t nbClusters = atoi(argv[3]); //could be 15
-       int randomize = atoi(argv[4]); //boolean
-       uint32_t p_for_dissims = atoi(argv[5]); //1 for L1, 2 for L2, ...etc
-
-       // Get totalNbSeries and tsLength
-       uint32_t totalNbSeries = get_nbSeries(ifileName);
-       uint32_t tsLength = get_tsLength(ifileName);
-
-       // Basic sanity checks
-       if (nbClusters <= 0 || nbSeriesInChunk <= 1)
-       {
-               MPI_Finalize();
-               return 0;
-       }
-       if (nbSeriesInChunk > totalNbSeries)
-               nbSeriesInChunk = totalNbSeries;
-       if (nbClusters > nbSeriesInChunk)
-               nbClusters = nbSeriesInChunk;
-
-       double idealNbSeriesInChunk = 0.0; //unused if randomize == TRUE
-       if (!randomize)
-       {
-               // Adjust nbSeriesInChunk to avoid small remainders.
-               // Each node should have at least nbSeriesInChunk (as given to the function).
-
-               // ==> We seek for the largest N such that (double)totalNbSeries / N >= nbSeriesInChunk
-               uint32_t N = totalNbSeries / nbSeriesInChunk + 1;
-               while ((double)totalNbSeries / N < nbSeriesInChunk) N--;
-               // At this point N>=1 is the solution
-               idealNbSeriesInChunk = (double)totalNbSeries / N;
-               nbSeriesInChunk = ceil(idealNbSeriesInChunk);
-       }
-       
-       // Initialize random generator
-       init_rng(1);
-       
-       // Find out my identity in the default communicator
-       int myrank;
-       MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-
-       if (myrank == 0)
-       {
-               // create temporary folder for intermediate results
-               mkdir(".tmp", S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
-               
-               master_run(ifileName, totalNbSeries, nbSeriesInChunk, idealNbSeriesInChunk, 
-                       tsLength, nbClusters, randomize, p_for_dissims);
-       }
-       
-       else
-               slave_run(myrank, nbSeriesInChunk, nbClusters);
-
-       MPI_Finalize();
-       return 0;
-}
-
-//main classification task (using clustering result)
-//NOTE: ifileName == courbes à classer (?!)
-int classif_main(int argc, char** argv)
-{
-       const char* ifileName = argv[1];
-       const char* xmlFileName = argv[2];
-       
-       // 1] load and retrieve info from XML file
-       xmlDoc* doc = xmlReadFile(xmlFileName, NULL, 0);
-
-       // Get the root element node
-       xmlNode* root_element = xmlDocGetRootElement(doc);
-
-       uint32_t p_for_dissims = 0;
-       uint32_t* ranks = NULL;
-       uint32_t nbClusters = 0;
-       char* binFileName;
-       for (xmlNode* cur_node=root_element->children; cur_node; cur_node=cur_node->next) 
-       {
-               if (cur_node->type != XML_ELEMENT_NODE)
-                       continue;
-               if (!strcmp(cur_node->name,"p_for_dissims"))
-                       p_for_dissims = atoi(cur_node->last->content);
-               else if (!strcmp(cur_node->name,"ranks"))
-               {
-                       //first pass: find nbClusters
-                       for (xmlNode* rankNode=cur_node->children; rankNode; rankNode=rankNode->next)
-                       {
-                               if (rankNode->type == XML_ELEMENT_NODE && !strcmp(rankNode->name,"rank"))
-                                       nbClusters++;
-                       }
-                       //second pass: fill ranks (not optimal, but not very important here)
-                       ranks = (uint32_t*) malloc(nbClusters*sizeof(uint32_t));
-                       uint32_t index = 0;
-                       for (xmlNode* rankNode=cur_node->children; rankNode; rankNode=rankNode->next)
-                       {
-                               if (rankNode->type == XML_ELEMENT_NODE && !strcmp(rankNode->name,"rank"))
-                                       ranks[index++] = atoi(rankNode->last->content) - 1;
-                       }
-               }
-               else if (!strcmp(cur_node->name,"file"))
-               {
-                       binFileName = (char*) malloc(strlen(cur_node->last->content)+1);
-                       strcpy(binFileName, cur_node->last->content);
-               }
-       }
-
-       xmlFreeDoc(doc);
-       xmlCleanupParser();
-
-       uint32_t tsLength1 = get_tsLength(ifileName);
-       uint32_t tsLength2 = get_tsLength(binFileName);
-       if (tsLength1 != tsLength2)
-       {
-               fprintf(stderr,"Warning: nbValues do not match. Data will be truncated.\n");
-               if (tsLength1 > tsLength2)
-                       tsLength1 = tsLength2;
-       }
-       uint32_t nbValues = (tsLength1 - 4) / 4;
-
-       // 2] Classify all series by batches of CURVES_PER_REQUEST
-       uint32_t nbSeries = get_nbSeries(ifileName);
-       PowerCurve* medoids = deserialize(binFileName, NULL, ranks, nbClusters);
-       free(binFileName);
-       free(ranks);
-       ranks = (uint32_t*)malloc(CURVES_PER_REQUEST*sizeof(uint32_t));
-
-       uint32_t smallestNonProcessedIndex = 0;
-       double DISTOR = 0.0;
-       FILE* labelsFile = fopen("LABELS", "w");
-       while (smallestNonProcessedIndex < nbSeries)
-       {
-               uint32_t lowerBound = smallestNonProcessedIndex;
-               uint32_t upperBound = smallestNonProcessedIndex + CURVES_PER_REQUEST;
-               if (upperBound > nbSeries)
-                       upperBound = nbSeries;
-               for (uint32_t i=0; i<upperBound-lowerBound; i++)
-                       ranks[i] = lowerBound + i;
-               PowerCurve* data = deserialize(ifileName, NULL, ranks, upperBound-lowerBound);
-               uint32_t* labels = get_class(data, upperBound-lowerBound, medoids, nbClusters,
-                       nbValues, p_for_dissims, &DISTOR);
-               // send labels to LABELS file
-               for (uint32_t i=0; i<upperBound-lowerBound; i++)
-               {
-                       free(data[i].values);
-                       fprintf(labelsFile, "%u\n",labels[i]);
-               }
-               free(data);
-               free(labels);
-               smallestNonProcessedIndex += (upperBound-lowerBound);
-       }
-       fclose(labelsFile);
-       for (uint32_t i=0; i<nbClusters; i++)
-               free(medoids[i].values);
-       free(medoids);
-       free(ranks);
-       fprintf(stdout, "DISTOR = %g\n",DISTOR);
-       return 0;
-}
-
-int main(int argc, char** argv)
-{
-       if (argc <= 1)
-       {
-               fprintf(stderr, "No argument provided. Exit.\n");
-               return 1;
-       }
-       
-       if (!strcmp(argv[1], "serialize"))
-               return serialize_main(argc-1, argv+1);
-       if (!strcmp(argv[1], "deserialize"))
-               return deserialize_main(argc-1, argv+1);
-       if (!strcmp(argv[1], "cluster"))
-               return cluster_main(argc-1, argv+1);
-       if (!strcmp(argv[1], "classif"))
-               return classif_main(argc-1, argv+1);
-
-       fprintf(stderr, "Unknown first argument.\n");
-       return 1;
-}