1 #define __STDC_FORMAT_MACROS //to print 64bits unsigned integers
3 #include <cgds/Vector.h>
8 char readInt(FILE* stream
, int64_t* integer
)
11 char curChar
= fgetc(stream
);
12 int sign
= (curChar
== '-' ? -1 : 1);
13 while (curChar
< '0' || curChar
> '9')
14 curChar
= fgetc(stream
);
15 ungetc(curChar
, stream
);
16 while ((curChar
= fgetc(stream
)) >= '0' && curChar
<= '9')
17 *integer
= 10 * (*integer
) + (int64_t) (curChar
- '0');
22 char readReal(FILE* stream
, float* real
)
25 char nextChar
= readInt(stream
, &integerPart
);
26 int64_t fractionalPart
= 0;
31 while ((nextChar
= fgetc(stream
)) == '0')
33 if (nextChar
>= '1' && nextChar
<= '9')
35 ungetc(nextChar
, stream
);
36 nextChar
= readInt(stream
, &fractionalPart
);
40 if (nextChar
== 'e' || nextChar
== 'E')
41 nextChar
= readInt(stream
, &exponent
);
42 *real
= ( integerPart
+ (integerPart
>0 ? 1. : -1.) * (float)fractionalPart
43 / pow(10,countZeros
+floor(log10(fractionalPart
>0 ? fractionalPart
: 1)+1)) )
48 // Parse a line into integer+float (ID, raw power)
49 static void scan_line(FILE* ifile
,
50 int posTime
, uint32_t* time
, int posID
, uint32_t* ID
, int posPower
, float* rawPower
)
56 if (position
== posTime
)
58 //TODO: go to 01JAN2009:00:00:00 and convert to integer (0, 1, ...)
60 else if (position
== posID
)
63 nextChar
= readInt(ifile
, &ID_on64bits
);
64 *ID
= (uint32_t)ID_on64bits
;
66 else if (position
== posPower
)
68 float power
= FLT_MAX
; //"NA"
69 nextChar
= readReal(ifile
, &power
); //?? WARNING here... if empty field ?!
70 *rawPower
= (float) power
;
73 //erase the comma (and skip field then)
74 nextChar
= fgetc(ifile
);
76 //continue until next comma (or line end or file end)
77 while (!feof(ifile
) && nextChar
!= '\n' && nextChar
!= '\r' && nextChar
!= ',')
78 nextChar
= fgetc(ifile
);
81 if (feof(ifile
) || nextChar
== '\n' || nextChar
== '\r')
83 // skip all potential line feeds
84 while (!feof(ifile
) && nextChar
== '\n' || nextChar
== '\r')
85 nextChar
= fgetc(ifile
);
87 ungetc(nextChar
, ifile
);
94 //TODO: check datetime at each line (build datetimes file ! for each year ?)
95 //also fill NA with closest value in file (easy)
96 //01JAN2009:00:00:00 ..........
99 // Main job: parse a data file into a conventional CSV file in rows, without header
100 void transform(const char* ifileName
, int posID
, int posTime
, int posValue
,
101 char* firstTime
, char* lastTime
, const char* ofileName
, int nbItems
) //uint32_t nbItems
103 //TODO: complete timedate vector from first_time and last_time
104 // --> this gives (expected) tsLength for free
106 FILE* ifile
= fopen(ifileName
, "r");
107 // output file to write time-series sequentially, CSV format.
108 FILE* ofile
= fopen(ofileName
, "w");
113 nextChar
= fgetc(ifile
);
114 while (!feof(ifile
) && nextChar
!= '\n' && nextChar
!= '\r')
116 // process one client (ID in first column) at a time
117 uint64_t processedLines
= 0; //execution trace
118 uint32_t seriesCount
=0, skippedSeriesCount
=0, tsLength
=0;
119 uint32_t mismatchLengthCount
=0;
120 float tsBuffer
[refTsLength
];
124 // next element to read always start with a digit
126 curChar
= fgetc(ifile
);
127 while (!feof(ifile
) && (curChar
< '0' || curChar
> '9'));
130 ungetc(curChar
, ifile
);
133 scan_line(ifile
, posID
, &ID
, posPower
, &rawPower
);
136 //just starting a new time-series: must process the last one (if there is a last one !)
139 if (tsLength
== refTsLength
)
141 for (int i
=0; i
<tsLength
; i
++)
143 char* format
= i
<tsLength
-1 ? "%g," : "%g";
144 fprintf(ofile
, format
, tsBuffer
[i
]);
146 fprintf(ofile
, "\n");
147 if (nbItems
> 0 && ++seriesCount
>= nbItems
)
150 //if something wrong happened, skip series
153 skippedSeriesCount
++;
154 if (tsLength
!= refTsLength
)
155 mismatchLengthCount
++;
159 // reinitialize flags
164 //We cannot write more than refTsLength values
165 if (tsLength
< refTsLength
)
166 tsBuffer
[tsLength
++] = rawPower
;
168 if ((++processedLines
) % 1000000 == 0)
169 fprintf(stdout
,"Processed %"PRIu64
" lines\n", processedLines
);
172 if (tsLength
== refTsLength
&& (nbItems
<= 0 || seriesCount
< nbItems
))
174 // flush last time-series if all conditions are met
175 for (int i
=0; i
<tsLength
; i
++)
177 char* format
= i
<tsLength
-1 ? "%g," : "%g";
178 fprintf(ofile
, format
, tsBuffer
[i
]);
180 fprintf(ofile
, "\n");
183 else if (nbItems
<= 0 || seriesCount
< nbItems
)
185 if (tsLength
!= refTsLength
)
186 mismatchLengthCount
++;
189 // finally print some statistics
190 if (seriesCount
< nbItems
)
191 fprintf(stdout
,"Warning: only %u series retrieved.\n",seriesCount
);
192 fprintf(stdout
,"%u mismatch series lengths.\n",mismatchLengthCount
);
198 int main(char** argv
, int argc
)
200 //TODO: args checks...
201 transform(argv
[1], atoi(argv
[2]), atoi(argv
[3]), atoi(argv
[4]),
202 argv
[5], argv
[6], argv
[7], atoi(argv
[8]));