From 73d68777d709e054cf74e806e23b0bdefda9462a Mon Sep 17 00:00:00 2001 From: Benjamin Auder <benjamin.auder@somewhere> Date: Wed, 11 Jan 2017 03:04:02 +0100 Subject: [PATCH] FIX: C code (double, float, ...) + wrapper (read/write data, get medoids) --- .../src/Algorithm/compute_coefficients.c | 12 ++++++------ .../src/Algorithm/compute_coefficients.h | 2 +- .../src/Algorithm/get_dissimilarities.c | 8 ++++---- .../src/Algorithm/get_dissimilarities.h | 4 ++-- code/stage1/src/Algorithm/pam.c | 16 ++++++++-------- code/stage1/src/Algorithm/pam.h | 2 +- code/stage1/src/Classification/getClass.c | 8 ++++---- code/stage1/src/MPI_Communication/pack.c | 18 +++++++++--------- code/stage1/src/MPI_Communication/unpack.c | 16 ++++++++-------- code/stage1/src/MPI_Main/master.c | 4 ++-- code/stage1/src/MPI_Main/slave.c | 4 ++-- code/stage1/src/TimeSeries/deserialize.c | 6 +++--- code/stage1/src/TimeSeries/serialize.c | 10 +++++----- code/stage1/src/Util/rng.c | 4 ++-- code/stage1/src/Util/rng.h | 2 +- code/stage1/src/Util/types.h | 4 +--- code/stage1/src/Util/utils.c | 12 ++++++------ code/stage1/src/Util/utils.h | 10 +++++++--- code/stage1/wrapper.R | 19 ++++++++++++++++--- 19 files changed, 88 insertions(+), 73 deletions(-) diff --git a/code/stage1/src/Algorithm/compute_coefficients.c b/code/stage1/src/Algorithm/compute_coefficients.c index a7a73f3..cfec0e6 100644 --- a/code/stage1/src/Algorithm/compute_coefficients.c +++ b/code/stage1/src/Algorithm/compute_coefficients.c @@ -6,14 +6,14 @@ // compute rows of the matrix of reduced coordinates void compute_coefficients(PowerCurve* powerCurves, uint32_t nbSeries, uint32_t nbValues, - Real* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates) + float* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates) { uint32_t D = (1 << nbReducedCoordinates); - Real* x = (Real*) malloc(nbValues*sizeof(Real)); + double* x = (double*) malloc(nbValues*sizeof(double)); for (uint32_t i=0; i<nbValues; i++) x[i] = i; - Real* y = (Real*) malloc(nbValues*sizeof(Real)); - Real* interpolatedTranformedCurve = (Real*) malloc(D*sizeof(Real)); + double* y = (double*) malloc(nbValues*sizeof(double)); + double* interpolatedTranformedCurve = (double*) malloc(D*sizeof(double)); gsl_interp* linearInterpolation = gsl_interp_alloc(gsl_interp_linear, nbValues); gsl_interp_accel* acc = gsl_interp_accel_alloc(); gsl_wavelet_workspace* work = gsl_wavelet_workspace_alloc(D); @@ -29,7 +29,7 @@ void compute_coefficients(PowerCurve* powerCurves, uint32_t nbSeries, uint32_t n for (uint32_t j=0; j<D; j++) { interpolatedTranformedCurve[j] = - gsl_interp_eval(linearInterpolation, x, y, j*((Real)(nbValues-1)/(D-1)), acc); + gsl_interp_eval(linearInterpolation, x, y, j*((float)(nbValues-1)/(D-1)), acc); } //DWT transform (in place) on interpolated curve [TODO: clarify stride parameter] gsl_wavelet_transform_forward(w, interpolatedTranformedCurve, 1, D, work); @@ -40,7 +40,7 @@ void compute_coefficients(PowerCurve* powerCurves, uint32_t nbSeries, uint32_t n { t1 += (1 << j); //reducedCoordinates[(index+i)*nbReducedCoordinates+j] = sqrt( sum( x[t0:t1]^2 ) ) - Real sumOnSegment = 0.0; + float sumOnSegment = 0.0; for (uint32_t u=t0; u<t1; u++) sumOnSegment += interpolatedTranformedCurve[u]*interpolatedTranformedCurve[u]; reducedCoordinates[(index+i)*nbReducedCoordinates+j] = sqrt(sumOnSegment); diff --git a/code/stage1/src/Algorithm/compute_coefficients.h b/code/stage1/src/Algorithm/compute_coefficients.h index 9530b85..a77dc60 100644 --- a/code/stage1/src/Algorithm/compute_coefficients.h +++ b/code/stage1/src/Algorithm/compute_coefficients.h @@ -5,6 +5,6 @@ // compute rows of the matrix of reduced coordinates (see computeCoefficients.R) void compute_coefficients(PowerCurve* powerCurves, uint32_t nbSeries, uint32_t nbValues, - Real* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates); + float* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates); #endif diff --git a/code/stage1/src/Algorithm/get_dissimilarities.c b/code/stage1/src/Algorithm/get_dissimilarities.c index 083cce6..5dc8ea2 100644 --- a/code/stage1/src/Algorithm/get_dissimilarities.c +++ b/code/stage1/src/Algorithm/get_dissimilarities.c @@ -3,9 +3,9 @@ #include <math.h> // compute L^p dissimilarities for a nxm matrix -Real* get_dissimilarities_intra(Real* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p) +float* get_dissimilarities_intra(float* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p) { - Real* dissimilarities = (Real*) malloc(nbSamples*nbSamples*sizeof(Real)); + float* dissimilarities = (float*) malloc(nbSamples*nbSamples*sizeof(float)); for (uint32_t i=0; i<nbSamples; i++) { dissimilarities[i*nbSamples+i] = 0.0; @@ -26,10 +26,10 @@ Real* get_dissimilarities_intra(Real* samples, uint32_t nbSamples, uint32_t nbVa } // compute L^p dissimilarities between rows of 2 matrices -Real* get_dissimilarities_inter(Real* mat1, uint32_t n1, Real* mat2, uint32_t n2, +float* get_dissimilarities_inter(float* mat1, uint32_t n1, float* mat2, uint32_t n2, uint32_t nbValues, uint32_t p) { - Real* dissimilarities = (Real*) malloc(n1*n2*sizeof(Real)); + float* dissimilarities = (float*) malloc(n1*n2*sizeof(float)); for (uint32_t i=0; i<n1; i++) { for (uint32_t j=0; j<n2; j++) diff --git a/code/stage1/src/Algorithm/get_dissimilarities.h b/code/stage1/src/Algorithm/get_dissimilarities.h index b3b51f9..b9580de 100644 --- a/code/stage1/src/Algorithm/get_dissimilarities.h +++ b/code/stage1/src/Algorithm/get_dissimilarities.h @@ -4,10 +4,10 @@ #include "Util/types.h" // compute L^p dissimilarities for a nxm matrix -Real* get_dissimilarities_intra(Real* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p); +float* get_dissimilarities_intra(float* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p); // compute L^p dissimilarities between rows of 2 matrices -Real* get_dissimilarities_inter(Real* mat1, uint32_t n1, Real* mat2, uint32_t n2, +float* get_dissimilarities_inter(float* mat1, uint32_t n1, float* mat2, uint32_t n2, uint32_t nbValues, uint32_t p); #endif diff --git a/code/stage1/src/Algorithm/pam.c b/code/stage1/src/Algorithm/pam.c index ecd72c3..60e3efe 100644 --- a/code/stage1/src/Algorithm/pam.c +++ b/code/stage1/src/Algorithm/pam.c @@ -27,11 +27,11 @@ static void sample(uint32_t* v, uint32_t n, uint32_t k) // assign a vector (represented by its dissimilarities to others, as dissimilarities[index,]) // to a cluster, represented by its center ==> output is integer in 0..K-1 -static uint32_t assignCluster(uint32_t index, Real* dissimilarities, +static uint32_t assignCluster(uint32_t index, float* dissimilarities, uint32_t* centers, uint32_t n, uint32_t K) { uint32_t minIndex = 0; - Real minDist = dissimilarities[index * n + centers[0]]; + float minDist = dissimilarities[index * n + centers[0]]; for (uint32_t j = 1; j < K; j++) { @@ -46,8 +46,8 @@ static uint32_t assignCluster(uint32_t index, Real* dissimilarities, } // assign centers given a clustering, and also compute corresponding distortion -static void assign_centers(uint32_t nbClusters, Vector** clusters, Real* dissimilarities, - uint32_t nbItems, uint32_t* ctrs, Real* distor) +static void assign_centers(uint32_t nbClusters, Vector** clusters, float* dissimilarities, + uint32_t nbItems, uint32_t* ctrs, float* distor) { *distor = 0.0; // TODO [heuristic]: checking only a neighborhood of the former center ? @@ -55,13 +55,13 @@ static void assign_centers(uint32_t nbClusters, Vector** clusters, Real* dissimi { // If the cluster is empty, choose a center at random (pathological case...) uint32_t minIndex = get_rand_int() % nbItems; - Real minSumDist = INFINITY; + float minSumDist = INFINITY; for (uint32_t i = 0; i < vector_size(clusters[j]); i++) { uint32_t index1; vector_get(clusters[j], i, index1); // attempt to use current index as center - Real sumDist = 0.0; + float sumDist = 0.0; for (uint32_t ii = 0; ii < vector_size(clusters[j]); ii++) { uint32_t index2; @@ -80,7 +80,7 @@ static void assign_centers(uint32_t nbClusters, Vector** clusters, Real* dissimi } // Core PAM algorithm from a dissimilarity matrix; (e.g. nstart=10, maxiter=100) -void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids, +void pam(float* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids, uint32_t nbStart, uint32_t maxNbIter, Result_t* result) { uint32_t* ctrs = result->medoids_ranks; //shorthand @@ -93,7 +93,7 @@ void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clust bestClusts[j] = vector_new(uint32_t); } - Real lastDistor, distor, bestDistor = INFINITY; + float lastDistor, distor, bestDistor = INFINITY; for (uint32_t startKount = 0; startKount < nbStart; startKount++) { // centers (random) [re]initialization diff --git a/code/stage1/src/Algorithm/pam.h b/code/stage1/src/Algorithm/pam.h index 14274c8..353759c 100644 --- a/code/stage1/src/Algorithm/pam.h +++ b/code/stage1/src/Algorithm/pam.h @@ -7,7 +7,7 @@ #define MAXITER 100 // Core PAM algorithm from a 'flat' dissimilarity matrix -void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters, +void pam(float* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids, uint32_t nbStart, uint32_t maxNbIter, Result_t* result); #endif diff --git a/code/stage1/src/Classification/getClass.c b/code/stage1/src/Classification/getClass.c index aed1467..acec167 100644 --- a/code/stage1/src/Classification/getClass.c +++ b/code/stage1/src/Classification/getClass.c @@ -12,14 +12,14 @@ uint32_t* get_class(PowerCurve* data, uint32_t nbSeries, PowerCurve* medoids, uint32_t nbReducedCoordinates = (uint32_t)ceil(log2(nbValues)); // Preprocessing to reduce dimension of both data and medoids - Real* reducedCoordinates_data = (Real*) malloc(nbSeries * nbReducedCoordinates * sizeof(Real)); + float* reducedCoordinates_data = (float*) malloc(nbSeries * nbReducedCoordinates * sizeof(float)); compute_coefficients(data, nbSeries, nbValues, reducedCoordinates_data, 0, nbReducedCoordinates); - Real* reducedCoordinates_medoids = (Real*) malloc(nbClusters * nbReducedCoordinates * sizeof(Real)); + float* reducedCoordinates_medoids = (float*) malloc(nbClusters * nbReducedCoordinates * sizeof(float)); compute_coefficients(medoids, nbClusters, nbValues, reducedCoordinates_medoids, 0, nbReducedCoordinates); - Real* dissimilarities = get_dissimilarities_inter(reducedCoordinates_data, nbSeries, + float* dissimilarities = get_dissimilarities_inter(reducedCoordinates_data, nbSeries, reducedCoordinates_medoids, nbClusters, nbReducedCoordinates, p_for_dissims); free(reducedCoordinates_data); free(reducedCoordinates_medoids); @@ -29,7 +29,7 @@ uint32_t* get_class(PowerCurve* data, uint32_t nbSeries, PowerCurve* medoids, for (uint32_t i=0; i<nbSeries; i++) { uint32_t minIndex = 0; - Real minDissim = dissimilarities[i*nbClusters + 0]; + float minDissim = dissimilarities[i*nbClusters + 0]; for (uint32_t j=1; j<nbClusters; j++) { if (dissimilarities[i*nbClusters + j] < minDissim) diff --git a/code/stage1/src/MPI_Communication/pack.c b/code/stage1/src/MPI_Communication/pack.c index a7570b9..8cb7b4f 100644 --- a/code/stage1/src/MPI_Communication/pack.c +++ b/code/stage1/src/MPI_Communication/pack.c @@ -16,26 +16,26 @@ void pack_work(Work_t* work, uint32_t nbSeriesInChunk, Byte* packedWork) while (index < NCHAR_FNAME) packedWork[index++] = 0; - write_int(work->nbSeries, 4, packedWork + index); + write_int(work->nbSeries, packedWork + index); index += 4; for (uint32_t i = 0; i < work->nbSeries; i++) { - write_int(work->ranks[i], 4, packedWork + index); + write_int(work->ranks[i], packedWork + index); index += 4; } // complete with zeros for (uint32_t i = 0; i < nbSeriesInChunk - work->nbSeries; i++) { - write_int(0, 4, packedWork + index); + write_int(0, packedWork + index); index += 4; } - write_int(work->nbClusters, 4, packedWork + index); + write_int(work->nbClusters, packedWork + index); index += 4; - write_int(work->clustOnMedoids, 4, packedWork + index); + write_int(work->clustOnMedoids, packedWork + index); index += 4; - write_int(work->p_for_dissims, 4, packedWork + index); + write_int(work->p_for_dissims, packedWork + index); } // serialize a Result_t object into a bytes string @@ -43,18 +43,18 @@ void pack_result(Result_t* result, Byte* packedResult) { uint32_t index = 0; - write_int(result->nbClusters, 4, packedResult); + write_int(result->nbClusters, packedResult); index += 4; for (uint32_t i = 0; i < result->nbClusters; i++) { - write_int(result->medoids_ID[i], 4, packedResult + index); + write_int(result->medoids_ID[i], packedResult + index); index += 4; } for (uint32_t i = 0; i < result->nbClusters; i++) { - write_int(result->medoids_ranks[i], 4, packedResult + index); + write_int(result->medoids_ranks[i], packedResult + index); index += 4; } } diff --git a/code/stage1/src/MPI_Communication/unpack.c b/code/stage1/src/MPI_Communication/unpack.c index de51270..c0f7e3c 100644 --- a/code/stage1/src/MPI_Communication/unpack.c +++ b/code/stage1/src/MPI_Communication/unpack.c @@ -20,23 +20,23 @@ Work_t* unpack_work(Byte* packedWork, uint32_t nbSeriesInChunk) index = NCHAR_FNAME; - uint32_t nbSeries = work->nbSeries = bInt_to_uint(packedWork + index, 4); + uint32_t nbSeries = work->nbSeries = bInt_to_uint(packedWork + index); index += 4; work->ranks = (uint32_t*) malloc(nbSeries * sizeof(uint32_t)); for (uint32_t i = 0; i < nbSeries; i++) { - work->ranks[i] = bInt_to_uint(packedWork + index, 4); + work->ranks[i] = bInt_to_uint(packedWork + index); index += 4; } // shift over the zeros index += 4 * (nbSeriesInChunk - nbSeries); - work->nbClusters = bInt_to_uint(packedWork + index, 4); + work->nbClusters = bInt_to_uint(packedWork + index); index += 4; - work->clustOnMedoids = bInt_to_uint(packedWork + index, 4); + work->clustOnMedoids = bInt_to_uint(packedWork + index); index += 4; - work->p_for_dissims = bInt_to_uint(packedWork + index, 4); + work->p_for_dissims = bInt_to_uint(packedWork + index); return work; } @@ -47,20 +47,20 @@ Result_t* unpack_result(Byte* packedResult) Result_t* result = (Result_t*) malloc(sizeof(Result_t)); uint32_t index = 0; - uint32_t nbClusters = result->nbClusters = bInt_to_uint(packedResult, 4); + uint32_t nbClusters = result->nbClusters = bInt_to_uint(packedResult); index += 4; result->medoids_ID = (uint32_t*) malloc(nbClusters * sizeof(uint32_t)); for (uint32_t i = 0; i < nbClusters; i++) { - result->medoids_ID[i] = bInt_to_uint(packedResult + index, 4); + result->medoids_ID[i] = bInt_to_uint(packedResult + index); index += 4; } result->medoids_ranks = (uint32_t*) malloc(nbClusters * sizeof(uint32_t)); for (uint32_t i = 0; i < nbClusters; i++) { - result->medoids_ranks[i] = bInt_to_uint(packedResult + index, 4); + result->medoids_ranks[i] = bInt_to_uint(packedResult + index); index += 4; } diff --git a/code/stage1/src/MPI_Main/master.c b/code/stage1/src/MPI_Main/master.c index c4e16cd..dfecfde 100644 --- a/code/stage1/src/MPI_Main/master.c +++ b/code/stage1/src/MPI_Main/master.c @@ -253,9 +253,9 @@ static void clusters_reduce(char* inputFileName, char* outputFileName, uint32_t ofile = fopen(outputFileName, "r+b"); //read and write, binary fseek(ofile, 0, SEEK_SET); Byte intBuffer[4]; - write_int(newSeriesCount, 4, intBuffer); + write_int(newSeriesCount, intBuffer); fwrite(intBuffer, 1, 4, ofile); - write_int(tsLength, 4, intBuffer); + write_int(tsLength, intBuffer); fwrite(intBuffer, 1, 4, ofile); fclose(ofile); } diff --git a/code/stage1/src/MPI_Main/slave.c b/code/stage1/src/MPI_Main/slave.c index d9b0c47..e66f599 100644 --- a/code/stage1/src/MPI_Main/slave.c +++ b/code/stage1/src/MPI_Main/slave.c @@ -24,7 +24,7 @@ static Result_t* do_work(Work_t* work) // nbReducedCoordinates = smallest power of 2 which is above nbValues uint32_t nbReducedCoordinates = (uint32_t)ceil(log2(nbValues)); - Real* reducedCoordinates = (Real*) malloc(nbSeries * nbReducedCoordinates * sizeof(Real)); + float* reducedCoordinates = (float*) malloc(nbSeries * nbReducedCoordinates * sizeof(float)); // call preprocessing with the rows of raw power values matrix. // Keep the IDs in memory for further processing. @@ -49,7 +49,7 @@ static Result_t* do_work(Work_t* work) // *** Step 2 *** // Run PAM algorithm on the dissimilarity matrix computed from 'reducedCoordinates'. - Real* dissimilarities = get_dissimilarities_intra( + float* dissimilarities = get_dissimilarities_intra( reducedCoordinates, nbSeries, nbReducedCoordinates, work->p_for_dissims); free(reducedCoordinates); diff --git a/code/stage1/src/TimeSeries/deserialize.c b/code/stage1/src/TimeSeries/deserialize.c index 79f7843..e142fe3 100644 --- a/code/stage1/src/TimeSeries/deserialize.c +++ b/code/stage1/src/TimeSeries/deserialize.c @@ -37,7 +37,7 @@ PowerCurve* deserialize(const char* ifileName, const char* ofileName, if (!ofile) { powerCurve = powerCurves + i; - powerCurve->values = (Real*) malloc(valuesPerSerie * sizeof(Real)); + powerCurve->values = (float*) malloc(valuesPerSerie * sizeof(float)); } // translate 4-bytes binary integer into integer ID @@ -45,14 +45,14 @@ PowerCurve* deserialize(const char* ifileName, const char* ofileName, size_t lengthRead = fread(binaryID, 4, 1, ifile); if (lengthRead != 1) fprintf(stderr,"Warning: deserializing truncated binary file.\n"); - uint32_t ID = bInt_to_uint((Byte*) binaryID, 4); + uint32_t ID = bInt_to_uint((Byte*) binaryID); free(binaryID); if (ofile) fprintf(ofile, "%u,", ID); else powerCurve->ID = ID; - // translate 4-bytes binary integers into Real + // translate 4-bytes binary integers into float Byte* binarySerie = (Byte*) malloc(4 * valuesPerSerie); lengthRead = fread(binarySerie, 1, 4*valuesPerSerie, ifile); //TODO: assert that lengthRead == 4*valuesPerSerie (...) diff --git a/code/stage1/src/TimeSeries/serialize.c b/code/stage1/src/TimeSeries/serialize.c index 3caa371..ba52c87 100644 --- a/code/stage1/src/TimeSeries/serialize.c +++ b/code/stage1/src/TimeSeries/serialize.c @@ -22,7 +22,7 @@ static void scan_line(FILE* ifile, int posID, uint32_t* ID, int posPower, float* } else if (position == posPower) { - Real power; + float power; nextChar = readReal(ifile, &power); *rawPower = (float) power; } @@ -250,11 +250,11 @@ void serialize_byRows(const char* ifileName, const char* ofileName, uint32_t nbI uint32_t tsLength = 4*nbValues+4; FILE* ofile = fopen(ofileName, "wb"); Byte intBuffer[4]; - write_int(nbSeries, 4, intBuffer); + write_int(nbSeries, intBuffer); fwrite(intBuffer, 1, 4, ofile); - write_int(tsLength, 4, intBuffer); + write_int(tsLength, intBuffer); fwrite(intBuffer, 1, 4, ofile); - Real rawPower; + float rawPower; int64_t ID; for (uint32_t i=0; i<nbSeries; i++) @@ -264,7 +264,7 @@ void serialize_byRows(const char* ifileName, const char* ofileName, uint32_t nbI curChar = fgetc(ifile); ungetc(curChar, ifile); curChar = readInt(ifile, &ID); - write_int((uint32_t)ID, 4, intBuffer); + write_int((uint32_t)ID, intBuffer); fwrite(intBuffer, 1, 4, ofile); while (curChar == ',') curChar = fgetc(ifile); diff --git a/code/stage1/src/Util/rng.c b/code/stage1/src/Util/rng.c index 6cd4025..2933174 100644 --- a/code/stage1/src/Util/rng.c +++ b/code/stage1/src/Util/rng.c @@ -129,7 +129,7 @@ uint32_t get_rand_int() } // return a (pseudo-)random real number in [0,1] -Real get_rand_real() +float get_rand_real() { - return (Real) (INV_RANDMAX * (double) get_rand_int()); + return (float) (INV_RANDMAX * (double) get_rand_int()); } diff --git a/code/stage1/src/Util/rng.h b/code/stage1/src/Util/rng.h index 40822aa..371ff27 100644 --- a/code/stage1/src/Util/rng.h +++ b/code/stage1/src/Util/rng.h @@ -28,6 +28,6 @@ void init_rng(int flag); uint32_t get_rand_int(); // return a (pseudo-)random real number in [0,1] -Real get_rand_real(); +float get_rand_real(); #endif diff --git a/code/stage1/src/Util/types.h b/code/stage1/src/Util/types.h index 824be88..6b6da6f 100644 --- a/code/stage1/src/Util/types.h +++ b/code/stage1/src/Util/types.h @@ -12,8 +12,6 @@ typedef unsigned char Byte; -typedef float Real; - // Type to describe a job to be done in a node //TODO: merge with packed version to avoid extra copy by MPI typedef struct Work_t { @@ -40,7 +38,7 @@ typedef struct Result_t { // data structure to store a customer ID + [time-]serie typedef struct PowerCurve { uint32_t ID; - Real* values; + float* values; } PowerCurve; #endif diff --git a/code/stage1/src/Util/utils.c b/code/stage1/src/Util/utils.c index efd4dd2..91ea075 100644 --- a/code/stage1/src/Util/utils.c +++ b/code/stage1/src/Util/utils.c @@ -30,7 +30,7 @@ char readInt(FILE* stream, int64_t* integer) return curChar; } -char readReal(FILE* stream, Real* real) +char readReal(FILE* stream, float* real) { int64_t integerPart; char nextChar = readInt(stream, &integerPart); @@ -51,8 +51,8 @@ char readReal(FILE* stream, Real* real) if (nextChar == 'e' || nextChar == 'E') nextChar = readInt(stream, &exponent); int64_t divisorForFractional = pow(10, floor(log10(fractionalPart > 0 ? fractionalPart : 1))+1); - *real = ( (Real)integerPart - + (integerPart < 0 ? -1 : 1) * (Real)fractionalPart/(divisorForFractional*pow(10,countZeros)) ) + *real = ( (float)integerPart + + (integerPart < 0 ? -1 : 1) * (float)fractionalPart/(divisorForFractional*pow(10,countZeros)) ) * pow(10,exponent); return nextChar; } @@ -79,7 +79,7 @@ void write_int(uint32_t x, Byte* buffer) //WARNING: assuming float is 32bits... // convert 4-bytes binary float to float -float bReal_to_double(Byte* pFloat) +float bReal_to_float(Byte* pFloat) { float res; memcpy(&res, pFloat, 4); @@ -121,7 +121,7 @@ uint32_t get_nbSeries(const char* ifileName) if (lengthRead != 1) fprintf(stderr,"Warning: getting nbSeries from truncated binary file.\n"); fclose(ifile); - return bInt_to_uint(binaryInt, 4); + return bInt_to_uint(binaryInt); } // get metadata: tsLength @@ -134,5 +134,5 @@ uint32_t get_tsLength(const char* ifileName) if (lengthRead != 1) fprintf(stderr,"Warning: getting tsLength from truncated binary file.\n"); fclose(ifile); - return bInt_to_uint(binaryInt, 4); + return bInt_to_uint(binaryInt); } diff --git a/code/stage1/src/Util/utils.h b/code/stage1/src/Util/utils.h index 2ed68aa..3cb5cd1 100644 --- a/code/stage1/src/Util/utils.h +++ b/code/stage1/src/Util/utils.h @@ -13,13 +13,17 @@ void free_result(Result_t* result); char readInt(FILE* stream, int64_t* integer); -char readReal(FILE* stream, Real* real); +char readReal(FILE* stream, float* real); // convert n-bytes binary integers to uint32_t -uint32_t bInt_to_uint(Byte* pInteger, size_t bytesCount); +uint32_t bInt_to_uint(Byte* pInteger); // serialize integers with a portable bytes order -void write_int(uint32_t integer, size_t bytesCount, Byte* buffer); +void write_int(uint32_t integer, Byte* buffer); + +float bReal_to_float(Byte* pFloat); + +void write_real(float x, Byte* buffer); // Expected size of a Work message in bytes: uint32_t get_packedWork_length(uint32_t nbSeriesInChunk); diff --git a/code/stage1/wrapper.R b/code/stage1/wrapper.R index 524fc90..0a957d8 100644 --- a/code/stage1/wrapper.R +++ b/code/stage1/wrapper.R @@ -1,4 +1,5 @@ -ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, args="DontLetMeEmpty") +ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, + args="DontLetMeEmptyPlease!") { command_line = paste("mpirun -np ",np," ",path,"/ppam.exe cluster",sep="") @@ -7,8 +8,9 @@ ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, args="DontL { if (!is.character(data)) { - #assuming matrix or data.frame, WITH row names (identifiers; could be line number...) - write.csv(data, "/tmp/data_csv", row.names=TRUE, col.names=FALSE) + #assuming matrix or data.frame, WITH row names + #( identifiers; could be line number... e.g. data <- cbind(1:nrow(data),data) ) + write.table(data, "/tmp/data_csv", sep=",", row.names=FALSE, col.names=FALSE) system(paste(path,"/ppam.exe serialize /tmp/data_csv /tmp/data_bin 0 0",sep="")) } else { @@ -20,3 +22,14 @@ ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, args="DontL command_line = paste(command_line," ",args,sep="") system(command_line) } + +#NOTE: identifiers in first column +getMedoids = function(path=".", xmlResult = "ppamResult.xml", + finalSeries = "ppamFinalSeries.bin") +{ + system(paste(path,"/ppam.exe deserialize ",finalSeries," ppamFinalSeries.csv -1",sep="")) + curves = read.table("ppamFinalSeries.csv", sep=",") + library(XML) + ranks = as.integer( xmlToList( xmlParse(xmlResult) )$ranks ) + return ( curves[ranks,] ) # == medoids +} -- 2.44.0