// compute rows of the matrix of reduced coordinates
void compute_coefficients(PowerCurve* powerCurves, uint32_t nbSeries, uint32_t nbValues,
- Real* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates)
+ float* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates)
{
uint32_t D = (1 << nbReducedCoordinates);
- Real* x = (Real*) malloc(nbValues*sizeof(Real));
+ double* x = (double*) malloc(nbValues*sizeof(double));
for (uint32_t i=0; i<nbValues; i++)
x[i] = i;
- Real* y = (Real*) malloc(nbValues*sizeof(Real));
- Real* interpolatedTranformedCurve = (Real*) malloc(D*sizeof(Real));
+ double* y = (double*) malloc(nbValues*sizeof(double));
+ double* interpolatedTranformedCurve = (double*) malloc(D*sizeof(double));
gsl_interp* linearInterpolation = gsl_interp_alloc(gsl_interp_linear, nbValues);
gsl_interp_accel* acc = gsl_interp_accel_alloc();
gsl_wavelet_workspace* work = gsl_wavelet_workspace_alloc(D);
for (uint32_t j=0; j<D; j++)
{
interpolatedTranformedCurve[j] =
- gsl_interp_eval(linearInterpolation, x, y, j*((Real)(nbValues-1)/(D-1)), acc);
+ gsl_interp_eval(linearInterpolation, x, y, j*((float)(nbValues-1)/(D-1)), acc);
}
//DWT transform (in place) on interpolated curve [TODO: clarify stride parameter]
gsl_wavelet_transform_forward(w, interpolatedTranformedCurve, 1, D, work);
{
t1 += (1 << j);
//reducedCoordinates[(index+i)*nbReducedCoordinates+j] = sqrt( sum( x[t0:t1]^2 ) )
- Real sumOnSegment = 0.0;
+ float sumOnSegment = 0.0;
for (uint32_t u=t0; u<t1; u++)
sumOnSegment += interpolatedTranformedCurve[u]*interpolatedTranformedCurve[u];
reducedCoordinates[(index+i)*nbReducedCoordinates+j] = sqrt(sumOnSegment);
// compute rows of the matrix of reduced coordinates (see computeCoefficients.R)
void compute_coefficients(PowerCurve* powerCurves, uint32_t nbSeries, uint32_t nbValues,
- Real* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates);
+ float* reducedCoordinates, uint32_t index, uint32_t nbReducedCoordinates);
#endif
#include <math.h>
// compute L^p dissimilarities for a nxm matrix
-Real* get_dissimilarities_intra(Real* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p)
+float* get_dissimilarities_intra(float* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p)
{
- Real* dissimilarities = (Real*) malloc(nbSamples*nbSamples*sizeof(Real));
+ float* dissimilarities = (float*) malloc(nbSamples*nbSamples*sizeof(float));
for (uint32_t i=0; i<nbSamples; i++)
{
dissimilarities[i*nbSamples+i] = 0.0;
}
// compute L^p dissimilarities between rows of 2 matrices
-Real* get_dissimilarities_inter(Real* mat1, uint32_t n1, Real* mat2, uint32_t n2,
+float* get_dissimilarities_inter(float* mat1, uint32_t n1, float* mat2, uint32_t n2,
uint32_t nbValues, uint32_t p)
{
- Real* dissimilarities = (Real*) malloc(n1*n2*sizeof(Real));
+ float* dissimilarities = (float*) malloc(n1*n2*sizeof(float));
for (uint32_t i=0; i<n1; i++)
{
for (uint32_t j=0; j<n2; j++)
#include "Util/types.h"
// compute L^p dissimilarities for a nxm matrix
-Real* get_dissimilarities_intra(Real* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p);
+float* get_dissimilarities_intra(float* samples, uint32_t nbSamples, uint32_t nbValues, uint32_t p);
// compute L^p dissimilarities between rows of 2 matrices
-Real* get_dissimilarities_inter(Real* mat1, uint32_t n1, Real* mat2, uint32_t n2,
+float* get_dissimilarities_inter(float* mat1, uint32_t n1, float* mat2, uint32_t n2,
uint32_t nbValues, uint32_t p);
#endif
// assign a vector (represented by its dissimilarities to others, as dissimilarities[index,])
// to a cluster, represented by its center ==> output is integer in 0..K-1
-static uint32_t assignCluster(uint32_t index, Real* dissimilarities,
+static uint32_t assignCluster(uint32_t index, float* dissimilarities,
uint32_t* centers, uint32_t n, uint32_t K)
{
uint32_t minIndex = 0;
- Real minDist = dissimilarities[index * n + centers[0]];
+ float minDist = dissimilarities[index * n + centers[0]];
for (uint32_t j = 1; j < K; j++)
{
}
// assign centers given a clustering, and also compute corresponding distortion
-static void assign_centers(uint32_t nbClusters, Vector** clusters, Real* dissimilarities,
- uint32_t nbItems, uint32_t* ctrs, Real* distor)
+static void assign_centers(uint32_t nbClusters, Vector** clusters, float* dissimilarities,
+ uint32_t nbItems, uint32_t* ctrs, float* distor)
{
*distor = 0.0;
// TODO [heuristic]: checking only a neighborhood of the former center ?
{
// If the cluster is empty, choose a center at random (pathological case...)
uint32_t minIndex = get_rand_int() % nbItems;
- Real minSumDist = INFINITY;
+ float minSumDist = INFINITY;
for (uint32_t i = 0; i < vector_size(clusters[j]); i++)
{
uint32_t index1;
vector_get(clusters[j], i, index1);
// attempt to use current index as center
- Real sumDist = 0.0;
+ float sumDist = 0.0;
for (uint32_t ii = 0; ii < vector_size(clusters[j]); ii++)
{
uint32_t index2;
}
// Core PAM algorithm from a dissimilarity matrix; (e.g. nstart=10, maxiter=100)
-void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids,
+void pam(float* dissimilarities, uint32_t nbItems, uint32_t nbClusters, int clustOnMedoids,
uint32_t nbStart, uint32_t maxNbIter, Result_t* result)
{
uint32_t* ctrs = result->medoids_ranks; //shorthand
bestClusts[j] = vector_new(uint32_t);
}
- Real lastDistor, distor, bestDistor = INFINITY;
+ float lastDistor, distor, bestDistor = INFINITY;
for (uint32_t startKount = 0; startKount < nbStart; startKount++)
{
// centers (random) [re]initialization
#define MAXITER 100
// Core PAM algorithm from a 'flat' dissimilarity matrix
-void pam(Real* dissimilarities, uint32_t nbItems, uint32_t nbClusters,
+void pam(float* dissimilarities, uint32_t nbItems, uint32_t nbClusters,
int clustOnMedoids, uint32_t nbStart, uint32_t maxNbIter, Result_t* result);
#endif
uint32_t nbReducedCoordinates = (uint32_t)ceil(log2(nbValues));
// Preprocessing to reduce dimension of both data and medoids
- Real* reducedCoordinates_data = (Real*) malloc(nbSeries * nbReducedCoordinates * sizeof(Real));
+ float* reducedCoordinates_data = (float*) malloc(nbSeries * nbReducedCoordinates * sizeof(float));
compute_coefficients(data, nbSeries, nbValues,
reducedCoordinates_data, 0, nbReducedCoordinates);
- Real* reducedCoordinates_medoids = (Real*) malloc(nbClusters * nbReducedCoordinates * sizeof(Real));
+ float* reducedCoordinates_medoids = (float*) malloc(nbClusters * nbReducedCoordinates * sizeof(float));
compute_coefficients(medoids, nbClusters, nbValues,
reducedCoordinates_medoids, 0, nbReducedCoordinates);
- Real* dissimilarities = get_dissimilarities_inter(reducedCoordinates_data, nbSeries,
+ float* dissimilarities = get_dissimilarities_inter(reducedCoordinates_data, nbSeries,
reducedCoordinates_medoids, nbClusters, nbReducedCoordinates, p_for_dissims);
free(reducedCoordinates_data);
free(reducedCoordinates_medoids);
for (uint32_t i=0; i<nbSeries; i++)
{
uint32_t minIndex = 0;
- Real minDissim = dissimilarities[i*nbClusters + 0];
+ float minDissim = dissimilarities[i*nbClusters + 0];
for (uint32_t j=1; j<nbClusters; j++)
{
if (dissimilarities[i*nbClusters + j] < minDissim)
while (index < NCHAR_FNAME)
packedWork[index++] = 0;
- write_int(work->nbSeries, 4, packedWork + index);
+ write_int(work->nbSeries, packedWork + index);
index += 4;
for (uint32_t i = 0; i < work->nbSeries; i++)
{
- write_int(work->ranks[i], 4, packedWork + index);
+ write_int(work->ranks[i], packedWork + index);
index += 4;
}
// complete with zeros
for (uint32_t i = 0; i < nbSeriesInChunk - work->nbSeries; i++)
{
- write_int(0, 4, packedWork + index);
+ write_int(0, packedWork + index);
index += 4;
}
- write_int(work->nbClusters, 4, packedWork + index);
+ write_int(work->nbClusters, packedWork + index);
index += 4;
- write_int(work->clustOnMedoids, 4, packedWork + index);
+ write_int(work->clustOnMedoids, packedWork + index);
index += 4;
- write_int(work->p_for_dissims, 4, packedWork + index);
+ write_int(work->p_for_dissims, packedWork + index);
}
// serialize a Result_t object into a bytes string
{
uint32_t index = 0;
- write_int(result->nbClusters, 4, packedResult);
+ write_int(result->nbClusters, packedResult);
index += 4;
for (uint32_t i = 0; i < result->nbClusters; i++)
{
- write_int(result->medoids_ID[i], 4, packedResult + index);
+ write_int(result->medoids_ID[i], packedResult + index);
index += 4;
}
for (uint32_t i = 0; i < result->nbClusters; i++)
{
- write_int(result->medoids_ranks[i], 4, packedResult + index);
+ write_int(result->medoids_ranks[i], packedResult + index);
index += 4;
}
}
index = NCHAR_FNAME;
- uint32_t nbSeries = work->nbSeries = bInt_to_uint(packedWork + index, 4);
+ uint32_t nbSeries = work->nbSeries = bInt_to_uint(packedWork + index);
index += 4;
work->ranks = (uint32_t*) malloc(nbSeries * sizeof(uint32_t));
for (uint32_t i = 0; i < nbSeries; i++)
{
- work->ranks[i] = bInt_to_uint(packedWork + index, 4);
+ work->ranks[i] = bInt_to_uint(packedWork + index);
index += 4;
}
// shift over the zeros
index += 4 * (nbSeriesInChunk - nbSeries);
- work->nbClusters = bInt_to_uint(packedWork + index, 4);
+ work->nbClusters = bInt_to_uint(packedWork + index);
index += 4;
- work->clustOnMedoids = bInt_to_uint(packedWork + index, 4);
+ work->clustOnMedoids = bInt_to_uint(packedWork + index);
index += 4;
- work->p_for_dissims = bInt_to_uint(packedWork + index, 4);
+ work->p_for_dissims = bInt_to_uint(packedWork + index);
return work;
}
Result_t* result = (Result_t*) malloc(sizeof(Result_t));
uint32_t index = 0;
- uint32_t nbClusters = result->nbClusters = bInt_to_uint(packedResult, 4);
+ uint32_t nbClusters = result->nbClusters = bInt_to_uint(packedResult);
index += 4;
result->medoids_ID = (uint32_t*) malloc(nbClusters * sizeof(uint32_t));
for (uint32_t i = 0; i < nbClusters; i++)
{
- result->medoids_ID[i] = bInt_to_uint(packedResult + index, 4);
+ result->medoids_ID[i] = bInt_to_uint(packedResult + index);
index += 4;
}
result->medoids_ranks = (uint32_t*) malloc(nbClusters * sizeof(uint32_t));
for (uint32_t i = 0; i < nbClusters; i++)
{
- result->medoids_ranks[i] = bInt_to_uint(packedResult + index, 4);
+ result->medoids_ranks[i] = bInt_to_uint(packedResult + index);
index += 4;
}
ofile = fopen(outputFileName, "r+b"); //read and write, binary
fseek(ofile, 0, SEEK_SET);
Byte intBuffer[4];
- write_int(newSeriesCount, 4, intBuffer);
+ write_int(newSeriesCount, intBuffer);
fwrite(intBuffer, 1, 4, ofile);
- write_int(tsLength, 4, intBuffer);
+ write_int(tsLength, intBuffer);
fwrite(intBuffer, 1, 4, ofile);
fclose(ofile);
}
// nbReducedCoordinates = smallest power of 2 which is above nbValues
uint32_t nbReducedCoordinates = (uint32_t)ceil(log2(nbValues));
- Real* reducedCoordinates = (Real*) malloc(nbSeries * nbReducedCoordinates * sizeof(Real));
+ float* reducedCoordinates = (float*) malloc(nbSeries * nbReducedCoordinates * sizeof(float));
// call preprocessing with the rows of raw power values matrix.
// Keep the IDs in memory for further processing.
// *** Step 2 ***
// Run PAM algorithm on the dissimilarity matrix computed from 'reducedCoordinates'.
- Real* dissimilarities = get_dissimilarities_intra(
+ float* dissimilarities = get_dissimilarities_intra(
reducedCoordinates, nbSeries, nbReducedCoordinates, work->p_for_dissims);
free(reducedCoordinates);
if (!ofile)
{
powerCurve = powerCurves + i;
- powerCurve->values = (Real*) malloc(valuesPerSerie * sizeof(Real));
+ powerCurve->values = (float*) malloc(valuesPerSerie * sizeof(float));
}
// translate 4-bytes binary integer into integer ID
size_t lengthRead = fread(binaryID, 4, 1, ifile);
if (lengthRead != 1)
fprintf(stderr,"Warning: deserializing truncated binary file.\n");
- uint32_t ID = bInt_to_uint((Byte*) binaryID, 4);
+ uint32_t ID = bInt_to_uint((Byte*) binaryID);
free(binaryID);
if (ofile)
fprintf(ofile, "%u,", ID);
else
powerCurve->ID = ID;
- // translate 4-bytes binary integers into Real
+ // translate 4-bytes binary integers into float
Byte* binarySerie = (Byte*) malloc(4 * valuesPerSerie);
lengthRead = fread(binarySerie, 1, 4*valuesPerSerie, ifile);
//TODO: assert that lengthRead == 4*valuesPerSerie (...)
}
else if (position == posPower)
{
- Real power;
+ float power;
nextChar = readReal(ifile, &power);
*rawPower = (float) power;
}
uint32_t tsLength = 4*nbValues+4;
FILE* ofile = fopen(ofileName, "wb");
Byte intBuffer[4];
- write_int(nbSeries, 4, intBuffer);
+ write_int(nbSeries, intBuffer);
fwrite(intBuffer, 1, 4, ofile);
- write_int(tsLength, 4, intBuffer);
+ write_int(tsLength, intBuffer);
fwrite(intBuffer, 1, 4, ofile);
- Real rawPower;
+ float rawPower;
int64_t ID;
for (uint32_t i=0; i<nbSeries; i++)
curChar = fgetc(ifile);
ungetc(curChar, ifile);
curChar = readInt(ifile, &ID);
- write_int((uint32_t)ID, 4, intBuffer);
+ write_int((uint32_t)ID, intBuffer);
fwrite(intBuffer, 1, 4, ofile);
while (curChar == ',')
curChar = fgetc(ifile);
}
// return a (pseudo-)random real number in [0,1]
-Real get_rand_real()
+float get_rand_real()
{
- return (Real) (INV_RANDMAX * (double) get_rand_int());
+ return (float) (INV_RANDMAX * (double) get_rand_int());
}
uint32_t get_rand_int();
// return a (pseudo-)random real number in [0,1]
-Real get_rand_real();
+float get_rand_real();
#endif
typedef unsigned char Byte;
-typedef float Real;
-
// Type to describe a job to be done in a node
//TODO: merge with packed version to avoid extra copy by MPI
typedef struct Work_t {
// data structure to store a customer ID + [time-]serie
typedef struct PowerCurve {
uint32_t ID;
- Real* values;
+ float* values;
} PowerCurve;
#endif
return curChar;
}
-char readReal(FILE* stream, Real* real)
+char readReal(FILE* stream, float* real)
{
int64_t integerPart;
char nextChar = readInt(stream, &integerPart);
if (nextChar == 'e' || nextChar == 'E')
nextChar = readInt(stream, &exponent);
int64_t divisorForFractional = pow(10, floor(log10(fractionalPart > 0 ? fractionalPart : 1))+1);
- *real = ( (Real)integerPart
- + (integerPart < 0 ? -1 : 1) * (Real)fractionalPart/(divisorForFractional*pow(10,countZeros)) )
+ *real = ( (float)integerPart
+ + (integerPart < 0 ? -1 : 1) * (float)fractionalPart/(divisorForFractional*pow(10,countZeros)) )
* pow(10,exponent);
return nextChar;
}
//WARNING: assuming float is 32bits...
// convert 4-bytes binary float to float
-float bReal_to_double(Byte* pFloat)
+float bReal_to_float(Byte* pFloat)
{
float res;
memcpy(&res, pFloat, 4);
if (lengthRead != 1)
fprintf(stderr,"Warning: getting nbSeries from truncated binary file.\n");
fclose(ifile);
- return bInt_to_uint(binaryInt, 4);
+ return bInt_to_uint(binaryInt);
}
// get metadata: tsLength
if (lengthRead != 1)
fprintf(stderr,"Warning: getting tsLength from truncated binary file.\n");
fclose(ifile);
- return bInt_to_uint(binaryInt, 4);
+ return bInt_to_uint(binaryInt);
}
char readInt(FILE* stream, int64_t* integer);
-char readReal(FILE* stream, Real* real);
+char readReal(FILE* stream, float* real);
// convert n-bytes binary integers to uint32_t
-uint32_t bInt_to_uint(Byte* pInteger, size_t bytesCount);
+uint32_t bInt_to_uint(Byte* pInteger);
// serialize integers with a portable bytes order
-void write_int(uint32_t integer, size_t bytesCount, Byte* buffer);
+void write_int(uint32_t integer, Byte* buffer);
+
+float bReal_to_float(Byte* pFloat);
+
+void write_real(float x, Byte* buffer);
// Expected size of a Work message in bytes:
uint32_t get_packedWork_length(uint32_t nbSeriesInChunk);
-ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL, args="DontLetMeEmpty")
+ppam_exe = function(path=".", np=parallel::detectCores(), data=NULL,
+ args="DontLetMeEmptyPlease!")
{
command_line = paste("mpirun -np ",np," ",path,"/ppam.exe cluster",sep="")
{
if (!is.character(data))
{
- #assuming matrix or data.frame, WITH row names (identifiers; could be line number...)
- write.csv(data, "/tmp/data_csv", row.names=TRUE, col.names=FALSE)
+ #assuming matrix or data.frame, WITH row names
+ #( identifiers; could be line number... e.g. data <- cbind(1:nrow(data),data) )
+ write.table(data, "/tmp/data_csv", sep=",", row.names=FALSE, col.names=FALSE)
system(paste(path,"/ppam.exe serialize /tmp/data_csv /tmp/data_bin 0 0",sep=""))
} else
{
command_line = paste(command_line," ",args,sep="")
system(command_line)
}
+
+#NOTE: identifiers in first column
+getMedoids = function(path=".", xmlResult = "ppamResult.xml",
+ finalSeries = "ppamFinalSeries.bin")
+{
+ system(paste(path,"/ppam.exe deserialize ",finalSeries," ppamFinalSeries.csv -1",sep=""))
+ curves = read.table("ppamFinalSeries.csv", sep=",")
+ library(XML)
+ ranks = as.integer( xmlToList( xmlParse(xmlResult) )$ranks )
+ return ( curves[ranks,] ) # == medoids
+}