From: Benjamin Auder Date: Mon, 6 Mar 2017 09:02:03 +0000 (+0100) Subject: preprocessing X-Git-Url: https://git.auder.net/%7B%7B%20asset%28%27mixstore/css/current/pieces/doc/%3C?a=commitdiff_plain;h=a570aa7de2f617eecc284da8d2acc3e59d1da942;p=epclust.git preprocessing --- diff --git a/.gitignore b/.gitignore index f577abe..dbcc2f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ #datasets /data/* !/data/README +!/data/preprocessing/ #files generated by initialize.sh /.gitfat diff --git a/data/preprocessing/convert_ts.R b/data/preprocessing/convert_ts.R new file mode 100644 index 0000000..6c48b07 --- /dev/null +++ b/data/preprocessing/convert_ts.R @@ -0,0 +1,21 @@ +convert = function(orig_csv_file, nb_series_per_chunk) +{ + orig_file = file(orig_csv_file, open="r") + ignored = readLines(orig_file, 1) #skip header + + serie_length = 17520 #365*24*2 + library(sqldf, quietly=TRUE) + ids = read.csv.sql(file_csv, header = TRUE, sep = "," + sql = "select * from file_csv group by FK_CCU_ID") + index = 0 + repeat + { + if (index+1 >= length(ids)) + break + request = "select CPP_DATE_PUISSANCE,CPP_PUISSANCE_BRUTE where FK_CCU_ID in (" + for (id in ids[index + seq_len(nb_series_per_chunk)]) + request = paste(request, id, ",", sep="") + request = paste(request, ") order by FK_CCU_ID,CPP_DATE_PUISSANCE", sep="") + series_chunk = read.csv.sql(file_csv, header = TRUE, sep = ",", sql = request) + + index = index + 17520