advance on tests; almost there...

author Benjamin Auder <benjamin.auder@somewhere>

Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)

committer Benjamin Auder <benjamin.auder@somewhere>

Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)
author Benjamin Auder <benjamin.auder@somewhere>
Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)
committer Benjamin Auder <benjamin.auder@somewhere>
Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)
diff --git a/data/scripts/augment_meteo.R b/data/scripts/augment_meteo.R

index 19efe99..11649f3 100644 (file)
--- a/data/scripts/augment_meteo.R
+++ b/data/scripts/augment_meteo.R
@@ -1,27 +1,18 @@
  meteo_df = read.csv("meteo.csv")
  
+#inspiration
+#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
+
  meteo_df$Season = 0
  meteo_df$Week = 0
  meteo_df$Pollution = -1
  
-#Need to load and aggregate PM10 by days
-pm10_df = read.csv("pm10_mesures.csv")
+#Need to load and aggregate PM10 by days: use getData() from package
+data = getData(..., predict_at=0) #TODO:
  
-line_number = 1 #line number in pm10 file
-for (i in 1:(nrow(meteo_df)))
+for (i in 1:nrow(meteo_df))
  {
-       pm10s = c()
-       repeat {
-       {
-               pm10s = c(pm10s, pm10_df[line_number,2])
-               line_number = line_number + 1
-       };
-       if (line_number >= nrow(pm10_df)+1
-               || strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15')
-       {
-               break
-       }}
-       pm10_level = mean(pm10s, na.rm=TRUE)
+       pm10_level = data$getLevel(i)
         #Fill Pollution column: -1 if no info, 0 to 2 for pollution level
         if (!is.nan(pm10_level))
         {
@@ -41,8 +32,5 @@ for (i in 1:(nrow(meteo_df)))
         meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1)
  }
  
-#see also:
-#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
-
  #Finally write new data
  write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)
diff --git a/data/scripts/build_testdata.R b/data/scripts/build_testdata.R

index 516eae6..e0bc153 100644 (file)
--- a/data/scripts/build_testdata.R
+++ b/data/scripts/build_testdata.R
@@ -1,12 +1,16 @@
-# Only one covariable (so that the "matrix" is always invertible)
-
  #Lundi à dimanche, 182 jours
  dates = seq(as.Date("2007-01-01"),as.Date("2007-07-01"),"days")
-L = length(dates) #182
-exo_df = data.frame( "Date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) )
-ts_df = 
+times = strftime( seq(
+       from=ISOdatetime(2007,1,1,1,0,0,tz="GMT"),
+       to=ISOdatetime(2007,7,2,0,0,0,tz="GMT"),
+       by="hour") )
  
+# Only one covariable (so that the "matrix" is always invertible)
+L = length(dates) #182
+exo_df = data.frame( "date"=dates, "var"=rep(1,L), "var_pred"=rep(1,L) )
  # Series are 1,1,...,1 2,2,...,2 ... 7,7,...,7 1,1,...,1 ...etc
-# First serie on monday 2007-01-01
+# First serie on monday 2007-01-01, there are exactly 26 weeks
+ts_df = data.frame( "time"=times, "value"=rep(seq_len(7), each=24, times=26) )
  
-#TODO:
+write.csv(ts_df, file="ts_test.csv", row.names=FALSE)
+write.csv(exo_df, file="exo_test.csv", row.names=FALSE)
diff --git a/data/scripts/fill_NAs.R b/data/scripts/fill_NAs.R

index 72af4cb..8032662 100644 (file)
--- a/data/scripts/fill_NAs.R
+++ b/data/scripts/fill_NAs.R
@@ -9,21 +9,17 @@ fill_NAs = function(file)
         last_noNA_indices = rep(0, p)
         while (line <= n)
         {
-#              print(line)
                 for (i in 1:p)
                 {
-#                      if (is.numeric(dat[line,i]))
                         if (!is.na(dat[line,i]))
                         {
                                 if (last_noNA_indices[i] < line-1)
                                 {
                                         #do some completion
-                                       meanVal = ifelse(last_noNA_indices[i]>0,
-                                                                                                0.5*(dat[last_noNA_indices[i],i]+dat[line,i]),
-                                                                                                dat[line,i])
-
-                                       for (j in (last_noNA_indices[i]+1):(line-1))
-                                               dat[j,i] = meanVal
+                                       meanVal = ifelse( last_noNA_indices[i]>0,
+                                               0.5*(dat[last_noNA_indices[i],i]+dat[line,i]),
+                                               dat[line,i] )
+                                       dat[(last_noNA_indices[i]+1):(line-1),i] = meanVal
                                 }
                                 last_noNA_indices[i] = line
                         }
@@ -35,10 +31,7 @@ fill_NAs = function(file)
         for (i in 1:p)
         {
                 if (last_noNA_indices[i] < n)
-               {
-                       for (j in (last_noNA_indices[i]+1):n)
-                               dat[j,i] = dat[last_noNA_indices[i],i]
-               }
+                       dat[(last_noNA_indices[i]+1):n,i] = dat[last_noNA_indices[i],i]
         }
  
         dat_with_date[,2:(p+1)] = dat
author	Benjamin Auder <benjamin.auder@somewhere>
	Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)
committer	Benjamin Auder <benjamin.auder@somewhere>
	Wed, 22 Feb 2017 20:24:08 +0000 (21:24 +0100)
data/scripts/augment_meteo.R		patch \| blob \| blame \| history
data/scripts/build_testdata.R		patch \| blob \| blame \| history
data/scripts/fill_NAs.R		patch \| blob \| blame \| history