From: Benjamin Auder <benjamin.auder@somewhere>
Date: Mon, 6 Mar 2017 09:02:03 +0000 (+0100)
Subject: preprocessing
X-Git-Url: https://git.auder.net/doc/html/app_dev.php/index.css?a=commitdiff_plain;h=a570aa7de2f617eecc284da8d2acc3e59d1da942;p=epclust.git

preprocessing
---

diff --git a/.gitignore b/.gitignore
index f577abe..dbcc2f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 #datasets
 /data/*
 !/data/README
+!/data/preprocessing/
 
 #files generated by initialize.sh
 /.gitfat
diff --git a/data/preprocessing/convert_ts.R b/data/preprocessing/convert_ts.R
new file mode 100644
index 0000000..6c48b07
--- /dev/null
+++ b/data/preprocessing/convert_ts.R
@@ -0,0 +1,21 @@
+convert = function(orig_csv_file, nb_series_per_chunk)
+{
+	orig_file = file(orig_csv_file, open="r")
+	ignored = readLines(orig_file, 1) #skip header
+
+	serie_length = 17520 #365*24*2
+	library(sqldf, quietly=TRUE)
+	ids = read.csv.sql(file_csv, header = TRUE, sep = ","
+		sql = "select * from file_csv group by FK_CCU_ID")
+	index = 0
+	repeat
+	{
+		if (index+1 >= length(ids))
+			break
+		request = "select CPP_DATE_PUISSANCE,CPP_PUISSANCE_BRUTE where FK_CCU_ID in ("
+		for (id in ids[index + seq_len(nb_series_per_chunk)])
+			request = paste(request, id, ",", sep="")
+		request = paste(request, ") order by FK_CCU_ID,CPP_DATE_PUISSANCE", sep="")
+		series_chunk = read.csv.sql(file_csv, header = TRUE, sep = ",", sql = request)
+		
+		index = index + 17520