From: Benjamin Auder <benjamin.auder@somewhere>
Date: Wed, 22 Feb 2017 20:24:08 +0000 (+0100)
Subject: advance on tests; almost there...
X-Git-Url: https://git.auder.net/app_dev.php/doc/current/rpsls.js?a=commitdiff_plain;h=da7f46b8108ba9e39f24cbe29284064501a81175;p=talweg.git

advance on tests; almost there...
---

diff --git a/data/scripts/augment_meteo.R b/data/scripts/augment_meteo.R
index 19efe99..11649f3 100644
--- a/data/scripts/augment_meteo.R
+++ b/data/scripts/augment_meteo.R
@@ -1,27 +1,18 @@
 meteo_df = read.csv("meteo.csv")
 
+#inspiration
+#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
+
 meteo_df$Season = 0
 meteo_df$Week = 0
 meteo_df$Pollution = -1
 
-#Need to load and aggregate PM10 by days
-pm10_df = read.csv("pm10_mesures.csv")
+#Need to load and aggregate PM10 by days: use getData() from package
+data = getData(..., predict_at=0) #TODO:
 
-line_number = 1 #line number in pm10 file
-for (i in 1:(nrow(meteo_df)))
+for (i in 1:nrow(meteo_df))
 {
-	pm10s = c()
-	repeat {
-	{
-		pm10s = c(pm10s, pm10_df[line_number,2])
-		line_number = line_number + 1
-	};
-	if (line_number >= nrow(pm10_df)+1
-		|| strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15')
-	{
-		break
-	}}
-	pm10_level = mean(pm10s, na.rm=TRUE)
+	pm10_level = data$getLevel(i)
 	#Fill Pollution column: -1 if no info, 0 to 2 for pollution level
 	if (!is.nan(pm10_level))
 	{
@@ -41,8 +32,5 @@ for (i in 1:(nrow(meteo_df)))
 	meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1)
 }
 
-#see also:
-#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
-
 #Finally write new data
 write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)
diff --git a/data/scripts/build_testdata.R b/data/scripts/build_testdata.R
index 516eae6..e0bc153 100644
--- a/data/scripts/build_testdata.R
+++ b/data/scripts/build_testdata.R
@@ -1,12 +1,16 @@
-# Only one covariable (so that the "matrix" is always invertible)
-
 #Lundi Ã  dimanche, 182 jours
 dates = seq(as.Date("2007-01-01"),as.Date("2007-07-01"),"days")
-L = length(dates) #182
-exo_df = data.frame( "Date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) )
-ts_df = 
+times = strftime( seq(
+	from=ISOdatetime(2007,1,1,1,0,0,tz="GMT"),
+	to=ISOdatetime(2007,7,2,0,0,0,tz="GMT"),
+	by="hour") )
 
+# Only one covariable (so that the "matrix" is always invertible)
+L = length(dates) #182
+exo_df = data.frame( "date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) )
 # Series are 1,1,...,1 2,2,...,2 ... 7,7,...,7 1,1,...,1 ...etc
-# First serie on monday 2007-01-01
+# First serie on monday 2007-01-01, there are exactly 26 weeks
+ts_df = data.frame( "time"=times, "value"=rep(seq_len(7), each=24, times=26) )
 
-#TODO:
+write.csv(ts_df, file="ts_test.csv", row.names=FALSE)
+write.csv(exo_df, file="exo_test.csv", row.names=FALSE)
diff --git a/data/scripts/fill_NAs.R b/data/scripts/fill_NAs.R
index 72af4cb..8032662 100644
--- a/data/scripts/fill_NAs.R
+++ b/data/scripts/fill_NAs.R
@@ -9,21 +9,17 @@ fill_NAs = function(file)
 	last_noNA_indices = rep(0, p)
 	while (line <= n)
 	{
-#		print(line)
 		for (i in 1:p)
 		{
-#			if (is.numeric(dat[line,i]))
 			if (!is.na(dat[line,i]))
 			{
 				if (last_noNA_indices[i] < line-1)
 				{
 					#do some completion
-					meanVal = ifelse(last_noNA_indices[i]>0,
-												 0.5*(dat[last_noNA_indices[i],i]+dat[line,i]),
-												 dat[line,i])
-
-					for (j in (last_noNA_indices[i]+1):(line-1))
-						dat[j,i] = meanVal
+					meanVal = ifelse( last_noNA_indices[i]>0,
+						0.5*(dat[last_noNA_indices[i],i]+dat[line,i]),
+						dat[line,i] )
+					dat[(last_noNA_indices[i]+1):(line-1),i] = meanVal
 				}
 				last_noNA_indices[i] = line
 			}
@@ -35,10 +31,7 @@ fill_NAs = function(file)
 	for (i in 1:p)
 	{
 		if (last_noNA_indices[i] < n)
-		{
-			for (j in (last_noNA_indices[i]+1):n)
-				dat[j,i] = dat[last_noNA_indices[i],i]
-		}
+			dat[(last_noNA_indices[i]+1):n,i] = dat[last_noNA_indices[i],i]
 	}
 
 	dat_with_date[,2:(p+1)] = dat