From da7f46b8108ba9e39f24cbe29284064501a81175 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 22 Feb 2017 21:24:08 +0100 Subject: [PATCH] advance on tests; almost there... --- data/scripts/augment_meteo.R | 26 +++++++------------------- data/scripts/build_testdata.R | 18 +++++++++++------- data/scripts/fill_NAs.R | 17 +++++------------ 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/data/scripts/augment_meteo.R b/data/scripts/augment_meteo.R index 19efe99..11649f3 100644 --- a/data/scripts/augment_meteo.R +++ b/data/scripts/augment_meteo.R @@ -1,27 +1,18 @@ meteo_df = read.csv("meteo.csv") +#inspiration +#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame + meteo_df$Season = 0 meteo_df$Week = 0 meteo_df$Pollution = -1 -#Need to load and aggregate PM10 by days -pm10_df = read.csv("pm10_mesures.csv") +#Need to load and aggregate PM10 by days: use getData() from package +data = getData(..., predict_at=0) #TODO: -line_number = 1 #line number in pm10 file -for (i in 1:(nrow(meteo_df))) +for (i in 1:nrow(meteo_df)) { - pm10s = c() - repeat { - { - pm10s = c(pm10s, pm10_df[line_number,2]) - line_number = line_number + 1 - }; - if (line_number >= nrow(pm10_df)+1 - || strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15') - { - break - }} - pm10_level = mean(pm10s, na.rm=TRUE) + pm10_level = data$getLevel(i) #Fill Pollution column: -1 if no info, 0 to 2 for pollution level if (!is.nan(pm10_level)) { @@ -41,8 +32,5 @@ for (i in 1:(nrow(meteo_df))) meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1) } -#see also: -#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame - #Finally write new data write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE) diff --git a/data/scripts/build_testdata.R b/data/scripts/build_testdata.R index 516eae6..e0bc153 100644 --- a/data/scripts/build_testdata.R +++ b/data/scripts/build_testdata.R @@ -1,12 +1,16 @@ -# Only one covariable (so that the "matrix" is always invertible) - #Lundi à dimanche, 182 jours dates = seq(as.Date("2007-01-01"),as.Date("2007-07-01"),"days") -L = length(dates) #182 -exo_df = data.frame( "Date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) ) -ts_df = +times = strftime( seq( + from=ISOdatetime(2007,1,1,1,0,0,tz="GMT"), + to=ISOdatetime(2007,7,2,0,0,0,tz="GMT"), + by="hour") ) +# Only one covariable (so that the "matrix" is always invertible) +L = length(dates) #182 +exo_df = data.frame( "date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) ) # Series are 1,1,...,1 2,2,...,2 ... 7,7,...,7 1,1,...,1 ...etc -# First serie on monday 2007-01-01 +# First serie on monday 2007-01-01, there are exactly 26 weeks +ts_df = data.frame( "time"=times, "value"=rep(seq_len(7), each=24, times=26) ) -#TODO: +write.csv(ts_df, file="ts_test.csv", row.names=FALSE) +write.csv(exo_df, file="exo_test.csv", row.names=FALSE) diff --git a/data/scripts/fill_NAs.R b/data/scripts/fill_NAs.R index 72af4cb..8032662 100644 --- a/data/scripts/fill_NAs.R +++ b/data/scripts/fill_NAs.R @@ -9,21 +9,17 @@ fill_NAs = function(file) last_noNA_indices = rep(0, p) while (line <= n) { -# print(line) for (i in 1:p) { -# if (is.numeric(dat[line,i])) if (!is.na(dat[line,i])) { if (last_noNA_indices[i] < line-1) { #do some completion - meanVal = ifelse(last_noNA_indices[i]>0, - 0.5*(dat[last_noNA_indices[i],i]+dat[line,i]), - dat[line,i]) - - for (j in (last_noNA_indices[i]+1):(line-1)) - dat[j,i] = meanVal + meanVal = ifelse( last_noNA_indices[i]>0, + 0.5*(dat[last_noNA_indices[i],i]+dat[line,i]), + dat[line,i] ) + dat[(last_noNA_indices[i]+1):(line-1),i] = meanVal } last_noNA_indices[i] = line } @@ -35,10 +31,7 @@ fill_NAs = function(file) for (i in 1:p) { if (last_noNA_indices[i] < n) - { - for (j in (last_noNA_indices[i]+1):n) - dat[j,i] = dat[last_noNA_indices[i],i] - } + dat[(last_noNA_indices[i]+1):n,i] = dat[last_noNA_indices[i],i] } dat_with_date[,2:(p+1)] = dat -- 2.44.0