advance on tests; almost there...
authorBenjamin Auder <benjamin.auder@somewhere>
Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)
committerBenjamin Auder <benjamin.auder@somewhere>
Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)
data/scripts/augment_meteo.R
data/scripts/build_testdata.R
data/scripts/fill_NAs.R

index 19efe99..11649f3 100644 (file)
@@ -1,27 +1,18 @@
 meteo_df = read.csv("meteo.csv")
 
+#inspiration
+#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
+
 meteo_df$Season = 0
 meteo_df$Week = 0
 meteo_df$Pollution = -1
 
-#Need to load and aggregate PM10 by days
-pm10_df = read.csv("pm10_mesures.csv")
+#Need to load and aggregate PM10 by days: use getData() from package
+data = getData(..., predict_at=0) #TODO:
 
-line_number = 1 #line number in pm10 file
-for (i in 1:(nrow(meteo_df)))
+for (i in 1:nrow(meteo_df))
 {
-       pm10s = c()
-       repeat {
-       {
-               pm10s = c(pm10s, pm10_df[line_number,2])
-               line_number = line_number + 1
-       };
-       if (line_number >= nrow(pm10_df)+1
-               || strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15')
-       {
-               break
-       }}
-       pm10_level = mean(pm10s, na.rm=TRUE)
+       pm10_level = data$getLevel(i)
        #Fill Pollution column: -1 if no info, 0 to 2 for pollution level
        if (!is.nan(pm10_level))
        {
@@ -41,8 +32,5 @@ for (i in 1:(nrow(meteo_df)))
        meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1)
 }
 
-#see also:
-#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
-
 #Finally write new data
 write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)
index 516eae6..e0bc153 100644 (file)
@@ -1,12 +1,16 @@
-# Only one covariable (so that the "matrix" is always invertible)
-
 #Lundi à dimanche, 182 jours
 dates = seq(as.Date("2007-01-01"),as.Date("2007-07-01"),"days")
-L = length(dates) #182
-exo_df = data.frame( "Date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) )
-ts_df = 
+times = strftime( seq(
+       from=ISOdatetime(2007,1,1,1,0,0,tz="GMT"),
+       to=ISOdatetime(2007,7,2,0,0,0,tz="GMT"),
+       by="hour") )
 
+# Only one covariable (so that the "matrix" is always invertible)
+L = length(dates) #182
+exo_df = data.frame( "date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) )
 # Series are 1,1,...,1 2,2,...,2 ... 7,7,...,7 1,1,...,1 ...etc
-# First serie on monday 2007-01-01
+# First serie on monday 2007-01-01, there are exactly 26 weeks
+ts_df = data.frame( "time"=times, "value"=rep(seq_len(7), each=24, times=26) )
 
-#TODO:
+write.csv(ts_df, file="ts_test.csv", row.names=FALSE)
+write.csv(exo_df, file="exo_test.csv", row.names=FALSE)
index 72af4cb..8032662 100644 (file)
@@ -9,21 +9,17 @@ fill_NAs = function(file)
        last_noNA_indices = rep(0, p)
        while (line <= n)
        {
-#              print(line)
                for (i in 1:p)
                {
-#                      if (is.numeric(dat[line,i]))
                        if (!is.na(dat[line,i]))
                        {
                                if (last_noNA_indices[i] < line-1)
                                {
                                        #do some completion
-                                       meanVal = ifelse(last_noNA_indices[i]>0,
-                                                                                                0.5*(dat[last_noNA_indices[i],i]+dat[line,i]),
-                                                                                                dat[line,i])
-
-                                       for (j in (last_noNA_indices[i]+1):(line-1))
-                                               dat[j,i] = meanVal
+                                       meanVal = ifelse( last_noNA_indices[i]>0,
+                                               0.5*(dat[last_noNA_indices[i],i]+dat[line,i]),
+                                               dat[line,i] )
+                                       dat[(last_noNA_indices[i]+1):(line-1),i] = meanVal
                                }
                                last_noNA_indices[i] = line
                        }
@@ -35,10 +31,7 @@ fill_NAs = function(file)
        for (i in 1:p)
        {
                if (last_noNA_indices[i] < n)
-               {
-                       for (j in (last_noNA_indices[i]+1):n)
-                               dat[j,i] = dat[last_noNA_indices[i],i]
-               }
+                       dat[(last_noNA_indices[i]+1):n,i] = dat[last_noNA_indices[i],i]
        }
 
        dat_with_date[,2:(p+1)] = dat