From: Benjamin Auder Date: Wed, 11 Jan 2017 02:04:02 +0000 (+0100) Subject: FIX: C code (double, float, ...) + wrapper (read/write data, get medoids) X-Git-Url: https://git.auder.net/?p=epclust.git;a=commitdiff_plain;h=73d68777d709e054cf74e806e23b0bdefda9462a FIX: C code (double, float, ...) + wrapper (read/write data, get medoids) --- diff --git a/code/stage1/src/Algorithm/compute_coefficients.c b/code/stage1/src/Algorithm/compute_coefficients.c index a7a73f3..cfec0e6 100644 --- a/code/stage1/src/Algorithm/compute_coefficients.c +++ b/code/stage1/src/Algorithm/compute_coefficients.c @@ -6,14 +6,14 @@ // compute rows of the matrix of reduced coordinates void compute_coefficients(PowerCurve* powerCurves, uint32_t nbSeries, uint32_t nbValues, - Real* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates) + float* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates) { uint32_t D = (1 << nbReducedCoordinates); - Real* x = (Real*) malloc(nbValues*sizeof(Real)); + double* x = (double*) malloc(nbValues*sizeof(double)); for (uint32_t i=0; i // compute L^p dissimilarities for a nxm matrix -Real* get_dissimilarities_intra(Real* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p) +float* get_dissimilarities_intra(float* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p) { - Real* dissimilarities = (Real*) malloc(nbSamples*nbSamples*sizeof(Real)); + float* dissimilarities = (float*) malloc(nbSamples*nbSamples*sizeof(float)); for (uint32_t i=0; i output is integer in 0..K-1 -static uint32_t assignCluster(uint32_t index, Real* dissimilarities, +static uint32_t assignCluster(uint32_t index, float* dissimilarities, uint32_t* centers, uint32_t n, uint32_t K) { uint32_t minIndex = 0; - Real minDist = dissimilarities[index * n + centers[0]]; + float minDist = dissimilarities[index * n + centers[0]]; for (uint32_t j = 1; j < K; j++) { @@ -46,8 +46,8 @@ static uint32_t assignCluster(uint32_t index, Real* dissimilarities, } // assign centers given a clustering, and also compute corresponding distortion -static void assign_centers(uint32_t nbClusters, Vector** clusters, Real* dissimilarities, - uint32_t nbItems, uint32_t* ctrs, Real* distor) +static void assign_centers(uint32_t nbClusters, Vector** clusters, float* dissimilarities, + uint32_t nbItems, uint32_t* ctrs, float* distor) { *distor = 0.0; // TODO [heuristic]: checking only a neighborhood of the former center ? @@ -55,13 +55,13 @@ static void assign_centers(uint32_t nbClusters, Vector** clusters, Real* dissimi { // If the cluster is empty, choose a center at random (pathological case...) uint32_t minIndex = get_rand_int() % nbItems; - Real minSumDist = INFINITY; + float minSumDist = INFINITY; for (uint32_t i = 0; i < vector_size(clusters[j]); i++) { uint32_t index1; vector_get(clusters[j], i, index1); // attempt to use current index as center - Real sumDist = 0.0; + float sumDist = 0.0; for (uint32_t ii = 0; ii < vector_size(clusters[j]); ii++) { uint32_t index2; @@ -80,7 +80,7 @@ static void assign_centers(uint32_t nbClusters, Vector** clusters, Real* dissimi } // Core PAM algorithm from a dissimilarity matrix; (e.g. nstart=10, maxiter=100) -void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids, +void pam(float* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids, uint32_t nbStart, uint32_t maxNbIter, Result_t* result) { uint32_t* ctrs = result->medoids_ranks; //shorthand @@ -93,7 +93,7 @@ void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clust bestClusts[j] = vector_new(uint32_t); } - Real lastDistor, distor, bestDistor = INFINITY; + float lastDistor, distor, bestDistor = INFINITY; for (uint32_t startKount = 0; startKount < nbStart; startKount++) { // centers (random) [re]initialization diff --git a/code/stage1/src/Algorithm/pam.h b/code/stage1/src/Algorithm/pam.h index 14274c8..353759c 100644 --- a/code/stage1/src/Algorithm/pam.h +++ b/code/stage1/src/Algorithm/pam.h @@ -7,7 +7,7 @@ #define MAXITER 100 // Core PAM algorithm from a 'flat' dissimilarity matrix -void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters, +void pam(float* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids, uint32_t nbStart, uint32_t maxNbIter, Result_t* result); #endif diff --git a/code/stage1/src/Classification/getClass.c b/code/stage1/src/Classification/getClass.c index aed1467..acec167 100644 --- a/code/stage1/src/Classification/getClass.c +++ b/code/stage1/src/Classification/getClass.c @@ -12,14 +12,14 @@ uint32_t* get_class(PowerCurve* data, uint32_t nbSeries, PowerCurve* medoids, uint32_t nbReducedCoordinates = (uint32_t)ceil(log2(nbValues)); // Preprocessing to reduce dimension of both data and medoids - Real* reducedCoordinates_data = (Real*) malloc(nbSeries * nbReducedCoordinates * sizeof(Real)); + float* reducedCoordinates_data = (float*) malloc(nbSeries * nbReducedCoordinates * sizeof(float)); compute_coefficients(data, nbSeries, nbValues, reducedCoordinates_data, 0, nbReducedCoordinates); - Real* reducedCoordinates_medoids = (Real*) malloc(nbClusters * nbReducedCoordinates * sizeof(Real)); + float* reducedCoordinates_medoids = (float*) malloc(nbClusters * nbReducedCoordinates * sizeof(float)); compute_coefficients(medoids, nbClusters, nbValues, reducedCoordinates_medoids, 0, nbReducedCoordinates); - Real* dissimilarities = get_dissimilarities_inter(reducedCoordinates_data, nbSeries, + float* dissimilarities = get_dissimilarities_inter(reducedCoordinates_data, nbSeries, reducedCoordinates_medoids, nbClusters, nbReducedCoordinates, p_for_dissims); free(reducedCoordinates_data); free(reducedCoordinates_medoids); @@ -29,7 +29,7 @@ uint32_t* get_class(PowerCurve* data, uint32_t nbSeries, PowerCurve* medoids, for (uint32_t i=0; inbSeries, 4, packedWork + index); + write_int(work->nbSeries, packedWork + index); index += 4; for (uint32_t i = 0; i < work->nbSeries; i++) { - write_int(work->ranks[i], 4, packedWork + index); + write_int(work->ranks[i], packedWork + index); index += 4; } // complete with zeros for (uint32_t i = 0; i < nbSeriesInChunk - work->nbSeries; i++) { - write_int(0, 4, packedWork + index); + write_int(0, packedWork + index); index += 4; } - write_int(work->nbClusters, 4, packedWork + index); + write_int(work->nbClusters, packedWork + index); index += 4; - write_int(work->clustOnMedoids, 4, packedWork + index); + write_int(work->clustOnMedoids, packedWork + index); index += 4; - write_int(work->p_for_dissims, 4, packedWork + index); + write_int(work->p_for_dissims, packedWork + index); } // serialize a Result_t object into a bytes string @@ -43,18 +43,18 @@ void pack_result(Result_t* result, Byte* packedResult) { uint32_t index = 0; - write_int(result->nbClusters, 4, packedResult); + write_int(result->nbClusters, packedResult); index += 4; for (uint32_t i = 0; i < result->nbClusters; i++) { - write_int(result->medoids_ID[i], 4, packedResult + index); + write_int(result->medoids_ID[i], packedResult + index); index += 4; } for (uint32_t i = 0; i < result->nbClusters; i++) { - write_int(result->medoids_ranks[i], 4, packedResult + index); + write_int(result->medoids_ranks[i], packedResult + index); index += 4; } } diff --git a/code/stage1/src/MPI_Communication/unpack.c b/code/stage1/src/MPI_Communication/unpack.c index de51270..c0f7e3c 100644 --- a/code/stage1/src/MPI_Communication/unpack.c +++ b/code/stage1/src/MPI_Communication/unpack.c @@ -20,23 +20,23 @@ Work_t* unpack_work(Byte* packedWork, uint32_t nbSeriesInChunk) index = NCHAR_FNAME; - uint32_t nbSeries = work->nbSeries = bInt_to_uint(packedWork + index, 4); + uint32_t nbSeries = work->nbSeries = bInt_to_uint(packedWork + index); index += 4; work->ranks = (uint32_t*) malloc(nbSeries * sizeof(uint32_t)); for (uint32_t i = 0; i < nbSeries; i++) { - work->ranks[i] = bInt_to_uint(packedWork + index, 4); + work->ranks[i] = bInt_to_uint(packedWork + index); index += 4; } // shift over the zeros index += 4 * (nbSeriesInChunk - nbSeries); - work->nbClusters = bInt_to_uint(packedWork + index, 4); + work->nbClusters = bInt_to_uint(packedWork + index); index += 4; - work->clustOnMedoids = bInt_to_uint(packedWork + index, 4); + work->clustOnMedoids = bInt_to_uint(packedWork + index); index += 4; - work->p_for_dissims = bInt_to_uint(packedWork + index, 4); + work->p_for_dissims = bInt_to_uint(packedWork + index); return work; } @@ -47,20 +47,20 @@ Result_t* unpack_result(Byte* packedResult) Result_t* result = (Result_t*) malloc(sizeof(Result_t)); uint32_t index = 0; - uint32_t nbClusters = result->nbClusters = bInt_to_uint(packedResult, 4); + uint32_t nbClusters = result->nbClusters = bInt_to_uint(packedResult); index += 4; result->medoids_ID = (uint32_t*) malloc(nbClusters * sizeof(uint32_t)); for (uint32_t i = 0; i < nbClusters; i++) { - result->medoids_ID[i] = bInt_to_uint(packedResult + index, 4); + result->medoids_ID[i] = bInt_to_uint(packedResult + index); index += 4; } result->medoids_ranks = (uint32_t*) malloc(nbClusters * sizeof(uint32_t)); for (uint32_t i = 0; i < nbClusters; i++) { - result->medoids_ranks[i] = bInt_to_uint(packedResult + index, 4); + result->medoids_ranks[i] = bInt_to_uint(packedResult + index); index += 4; } diff --git a/code/stage1/src/MPI_Main/master.c b/code/stage1/src/MPI_Main/master.c index c4e16cd..dfecfde 100644 --- a/code/stage1/src/MPI_Main/master.c +++ b/code/stage1/src/MPI_Main/master.c @@ -253,9 +253,9 @@ static void clusters_reduce(char* inputFileName, char* outputFileName, uint32_t ofile = fopen(outputFileName, "r+b"); //read and write, binary fseek(ofile, 0, SEEK_SET); Byte intBuffer[4]; - write_int(newSeriesCount, 4, intBuffer); + write_int(newSeriesCount, intBuffer); fwrite(intBuffer, 1, 4, ofile); - write_int(tsLength, 4, intBuffer); + write_int(tsLength, intBuffer); fwrite(intBuffer, 1, 4, ofile); fclose(ofile); } diff --git a/code/stage1/src/MPI_Main/slave.c b/code/stage1/src/MPI_Main/slave.c index d9b0c47..e66f599 100644 --- a/code/stage1/src/MPI_Main/slave.c +++ b/code/stage1/src/MPI_Main/slave.c @@ -24,7 +24,7 @@ static Result_t* do_work(Work_t* work) // nbReducedCoordinates = smallest power of 2 which is above nbValues uint32_t nbReducedCoordinates = (uint32_t)ceil(log2(nbValues)); - Real* reducedCoordinates = (Real*) malloc(nbSeries * nbReducedCoordinates * sizeof(Real)); + float* reducedCoordinates = (float*) malloc(nbSeries * nbReducedCoordinates * sizeof(float)); // call preprocessing with the rows of raw power values matrix. // Keep the IDs in memory for further processing. @@ -49,7 +49,7 @@ static Result_t* do_work(Work_t* work) // *** Step 2 *** // Run PAM algorithm on the dissimilarity matrix computed from 'reducedCoordinates'. - Real* dissimilarities = get_dissimilarities_intra( + float* dissimilarities = get_dissimilarities_intra( reducedCoordinates, nbSeries, nbReducedCoordinates, work->p_for_dissims); free(reducedCoordinates); diff --git a/code/stage1/src/TimeSeries/deserialize.c b/code/stage1/src/TimeSeries/deserialize.c index 79f7843..e142fe3 100644 --- a/code/stage1/src/TimeSeries/deserialize.c +++ b/code/stage1/src/TimeSeries/deserialize.c @@ -37,7 +37,7 @@ PowerCurve* deserialize(const char* ifileName, const char* ofileName, if (!ofile) { powerCurve = powerCurves + i; - powerCurve->values = (Real*) malloc(valuesPerSerie * sizeof(Real)); + powerCurve->values = (float*) malloc(valuesPerSerie * sizeof(float)); } // translate 4-bytes binary integer into integer ID @@ -45,14 +45,14 @@ PowerCurve* deserialize(const char* ifileName, const char* ofileName, size_t lengthRead = fread(binaryID, 4, 1, ifile); if (lengthRead != 1) fprintf(stderr,"Warning: deserializing truncated binary file.\n"); - uint32_t ID = bInt_to_uint((Byte*) binaryID, 4); + uint32_t ID = bInt_to_uint((Byte*) binaryID); free(binaryID); if (ofile) fprintf(ofile, "%u,", ID); else powerCurve->ID = ID; - // translate 4-bytes binary integers into Real + // translate 4-bytes binary integers into float Byte* binarySerie = (Byte*) malloc(4 * valuesPerSerie); lengthRead = fread(binarySerie, 1, 4*valuesPerSerie, ifile); //TODO: assert that lengthRead == 4*valuesPerSerie (...) diff --git a/code/stage1/src/TimeSeries/serialize.c b/code/stage1/src/TimeSeries/serialize.c index 3caa371..ba52c87 100644 --- a/code/stage1/src/TimeSeries/serialize.c +++ b/code/stage1/src/TimeSeries/serialize.c @@ -22,7 +22,7 @@ static void scan_line(FILE* ifile, int posID, uint32_t* ID, int posPower, float* } else if (position == posPower) { - Real power; + float power; nextChar = readReal(ifile, &power); *rawPower = (float) power; } @@ -250,11 +250,11 @@ void serialize_byRows(const char* ifileName, const char* ofileName, uint32_t nbI uint32_t tsLength = 4*nbValues+4; FILE* ofile = fopen(ofileName, "wb"); Byte intBuffer[4]; - write_int(nbSeries, 4, intBuffer); + write_int(nbSeries, intBuffer); fwrite(intBuffer, 1, 4, ofile); - write_int(tsLength, 4, intBuffer); + write_int(tsLength, intBuffer); fwrite(intBuffer, 1, 4, ofile); - Real rawPower; + float rawPower; int64_t ID; for (uint32_t i=0; i 0 ? fractionalPart : 1))+1); - *real = ( (Real)integerPart - + (integerPart < 0 ? -1 : 1) * (Real)fractionalPart/(divisorForFractional*pow(10,countZeros)) ) + *real = ( (float)integerPart + + (integerPart < 0 ? -1 : 1) * (float)fractionalPart/(divisorForFractional*pow(10,countZeros)) ) * pow(10,exponent); return nextChar; } @@ -79,7 +79,7 @@ void write_int(uint32_t x, Byte* buffer) //WARNING: assuming float is 32bits... // convert 4-bytes binary float to float -float bReal_to_double(Byte* pFloat) +float bReal_to_float(Byte* pFloat) { float res; memcpy(&res, pFloat, 4); @@ -121,7 +121,7 @@ uint32_t get_nbSeries(const char* ifileName) if (lengthRead != 1) fprintf(stderr,"Warning: getting nbSeries from truncated binary file.\n"); fclose(ifile); - return bInt_to_uint(binaryInt, 4); + return bInt_to_uint(binaryInt); } // get metadata: tsLength @@ -134,5 +134,5 @@ uint32_t get_tsLength(const char* ifileName) if (lengthRead != 1) fprintf(stderr,"Warning: getting tsLength from truncated binary file.\n"); fclose(ifile); - return bInt_to_uint(binaryInt, 4); + return bInt_to_uint(binaryInt); } diff --git a/code/stage1/src/Util/utils.h b/code/stage1/src/Util/utils.h index 2ed68aa..3cb5cd1 100644 --- a/code/stage1/src/Util/utils.h +++ b/code/stage1/src/Util/utils.h @@ -13,13 +13,17 @@ void free_result(Result_t* result); char readInt(FILE* stream, int64_t* integer); -char readReal(FILE* stream, Real* real); +char readReal(FILE* stream, float* real); // convert n-bytes binary integers to uint32_t -uint32_t bInt_to_uint(Byte* pInteger, size_t bytesCount); +uint32_t bInt_to_uint(Byte* pInteger); // serialize integers with a portable bytes order -void write_int(uint32_t integer, size_t bytesCount, Byte* buffer); +void write_int(uint32_t integer, Byte* buffer); + +float bReal_to_float(Byte* pFloat); + +void write_real(float x, Byte* buffer); // Expected size of a Work message in bytes: uint32_t get_packedWork_length(uint32_t nbSeriesInChunk); diff --git a/code/stage1/wrapper.R b/code/stage1/wrapper.R index 524fc90..0a957d8 100644 --- a/code/stage1/wrapper.R +++ b/code/stage1/wrapper.R @@ -1,4 +1,5 @@ -ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, args="DontLetMeEmpty") +ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, + args="DontLetMeEmptyPlease!") { command_line = paste("mpirun -np ",np," ",path,"/ppam.exe cluster",sep="") @@ -7,8 +8,9 @@ ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, args="DontL { if (!is.character(data)) { - #assuming matrix or data.frame, WITH row names (identifiers; could be line number...) - write.csv(data, "/tmp/data_csv", row.names=TRUE, col.names=FALSE) + #assuming matrix or data.frame, WITH row names + #( identifiers; could be line number... e.g. data <- cbind(1:nrow(data),data) ) + write.table(data, "/tmp/data_csv", sep=",", row.names=FALSE, col.names=FALSE) system(paste(path,"/ppam.exe serialize /tmp/data_csv /tmp/data_bin 0 0",sep="")) } else { @@ -20,3 +22,14 @@ ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, args="DontL command_line = paste(command_line," ",args,sep="") system(command_line) } + +#NOTE: identifiers in first column +getMedoids = function(path=".", xmlResult = "ppamResult.xml", + finalSeries = "ppamFinalSeries.bin") +{ + system(paste(path,"/ppam.exe deserialize ",finalSeries," ppamFinalSeries.csv -1",sep="")) + curves = read.table("ppamFinalSeries.csv", sep=",") + library(XML) + ranks = as.integer( xmlToList( xmlParse(xmlResult) )$ranks ) + return ( curves[ranks,] ) # == medoids +}