+++ /dev/null
-convert_32kEDF = function(orig_csv, nb_series_per_chunk)
-{
- datetimes = #...TODO: all 3 years? year-by-year is better
- orig_con = file(orig_csv, open="r") #2009, 2010 or 2011
- ignored = readLines(orig_con, 1) #skip header
- serie_length = length(datetimes) #around 365*24*2 = 17520
- sep = if (year==2009) "," else if (year==2010) ";" else ";"
-
-scan(orig_con, character(), sep=",", nlines=1, quiet=TRUE)
- library(sqldf, quietly=TRUE)
- ids = read.csv.sql(file_csv, header = TRUE, sep = ","
- sql = "select * from file_csv group by FK_CCU_ID")
- index = 0
- repeat
- {
- if (index+1 >= length(ids))
- break
- request = "select CPP_DATE_PUISSANCE,CPP_PUISSANCE_BRUTE where FK_CCU_ID in ("
- for (id in ids[index + seq_len(nb_series_per_chunk)])
- request = paste(request, id, ",", sep="")
- request = paste(request, ") order by FK_CCU_ID,CPP_DATE_PUISSANCE", sep="")
- series_chunk = read.csv.sql(file_csv, header = TRUE, sep = ",", sql = request)
-
- index = index + 17520
}
// Parse a line into integer+float (ID, raw power)
-static void scan_line(FILE* ifile, int posID, uint32_t* ID, int posPower, float* rawPower)
+static void scan_line(FILE* ifile,
+ int posTime, uint32_t* time, int posID, uint32_t* ID, int posPower, float* rawPower)
{
char nextChar;
int position = 1;
while (1)
{
- if (position == posID)
+ if (position == posTime)
+ {
+ //TODO: go to 01JAN2009:00:00:00 and convert to integer (0, 1, ...)
+ }
+ else if (position == posID)
{
int64_t ID_on64bits;
nextChar = readInt(ifile, &ID_on64bits);
}
}
-// Main job: parse an "EDF-text file" into a conventional CSV file in rows, without header
-void transform(const char* ifileName, const char* ofileName, uint32_t nbItems)
+
+//TODO: check datetime at each line (build datetimes file ! for each year ?)
+//also fill NA with closest value in file (easy)
+//01JAN2009:00:00:00 ..........
+
+
+// Main job: parse a data file into a conventional CSV file in rows, without header
+void transform(const char* ifileName, int posID, int posTime, int posValue,
+ char* firstTime, char* lastTime, const char* ofileName, int nbItems) //uint32_t nbItems
{
- // Use the header to know positions of ID and rawPower
- FILE* ifile = fopen(ifileName, "r");
- uint32_t headerShift = 0;
- char curChar;
- Vector* header = vector_new(char);
- do
- {
- curChar = fgetc(ifile);
- headerShift++;
- if (curChar == '\n' || curChar == '\r')
- {
- // Flush all potential other line feeds
- while (curChar == '\n' || curChar == '\r')
- curChar = fgetc(ifile);
- ungetc(curChar, ifile);
- break;
- }
- vector_push(header, curChar);
- }
- while (1);
- char* headerString = (char*)malloc((vector_size(header) + 1)*sizeof(char));
- VectorIterator* it = vector_get_iterator(header);
- int index = 0;
- while (vectorI_has_data(it))
- {
- vectorI_get(it, headerString[index]);
- vectorI_move_next(it);
- index++;
- }
- vectorI_destroy(it);
- headerString[index] = 0;
- vector_destroy(header);
- int position = 1, posID = 0, posPower = 0;
- char* columnDescriptor = strtok(headerString, ",");
- while (columnDescriptor != NULL)
- {
- if (!strcmp(columnDescriptor,"FK_CCU_ID") || !strcmp(columnDescriptor,"fk_ccu_id"))
- posID = position;
- else if (!strcmp(columnDescriptor,"CPP_PUISSANCE_BRUTE"))
- posPower = position;
- position++;
- columnDescriptor = strtok(NULL, ",");
- }
- free(headerString);
-
- // Estimate tsLength with a scan of the 3 first series
- uint32_t ID=0, lastID=0, refTsLength=0;
- float rawPower = 0.;
- scan_line(ifile, posID, &ID, posPower, &rawPower);
- //'sl' = sample lengths (short because a lot of comparisons then)
- uint32_t* sl = (uint32_t*) calloc(3, sizeof(uint32_t));
- for (int i=0; i<3; i++)
- {
- lastID = ID;
- while (ID == lastID)
- {
- sl[i]++;
- scan_line(ifile, posID, &ID, posPower, &rawPower);
- }
- }
- if (sl[0] <= sl[1] <= sl[2] || sl[1] <= sl[0] <= sl[2])
- refTsLength = sl[2];
- else if (sl[1] <= sl[2] <= sl[0] || sl[2] <= sl[1] <= sl[0])
- refTsLength = sl[0];
- else
- refTsLength = sl[1];
- free(sl);
- //go back at the beginning of the first series (ready to read '\n'...)
- fseek(ifile, headerShift-1, SEEK_SET);
+ //TODO: complete timedate vector from first_time and last_time
+ // --> this gives (expected) tsLength for free
+ FILE* ifile = fopen(ifileName, "r");
// output file to write time-series sequentially, CSV format.
FILE* ofile = fopen(ofileName, "w");
+ // Skip header
+ char nextChar;
+ do
+ nextChar = fgetc(ifile);
+ while (!feof(ifile) && nextChar != '\n' && nextChar != '\r')
+
// process one client (ID in first column) at a time
uint64_t processedLines = 0; //execution trace
uint32_t seriesCount=0, skippedSeriesCount=0, tsLength=0;
fclose(ifile);
fclose(ofile);
}
+
+int main(char** argv, int argc)
+{
+ //TODO: args checks...
+ transform(argv[1], atoi(argv[2]), atoi(argv[3]), atoi(argv[4]),
+ argv[5], argv[6], argv[7], atoi(argv[8]));
+ return 0;
+}