From a570aa7de2f617eecc284da8d2acc3e59d1da942 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 6 Mar 2017 10:02:03 +0100 Subject: [PATCH] preprocessing --- .gitignore | 1 + data/preprocessing/convert_ts.R | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 data/preprocessing/convert_ts.R diff --git a/.gitignore b/.gitignore index f577abe..dbcc2f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ #datasets /data/* !/data/README +!/data/preprocessing/ #files generated by initialize.sh /.gitfat diff --git a/data/preprocessing/convert_ts.R b/data/preprocessing/convert_ts.R new file mode 100644 index 0000000..6c48b07 --- /dev/null +++ b/data/preprocessing/convert_ts.R @@ -0,0 +1,21 @@ +convert = function(orig_csv_file, nb_series_per_chunk) +{ + orig_file = file(orig_csv_file, open="r") + ignored = readLines(orig_file, 1) #skip header + + serie_length = 17520 #365*24*2 + library(sqldf, quietly=TRUE) + ids = read.csv.sql(file_csv, header = TRUE, sep = "," + sql = "select * from file_csv group by FK_CCU_ID") + index = 0 + repeat + { + if (index+1 >= length(ids)) + break + request = "select CPP_DATE_PUISSANCE,CPP_PUISSANCE_BRUTE where FK_CCU_ID in (" + for (id in ids[index + seq_len(nb_series_per_chunk)]) + request = paste(request, id, ",", sep="") + request = paste(request, ") order by FK_CCU_ID,CPP_DATE_PUISSANCE", sep="") + series_chunk = read.csv.sql(file_csv, header = TRUE, sep = ",", sql = request) + + index = index + 17520 -- 2.44.0