all set-up to prepare ultimate test before last reports
[talweg.git] / data / scripts / augment_meteo.R
diff --git a/data/scripts/augment_meteo.R b/data/scripts/augment_meteo.R
new file mode 100644 (file)
index 0000000..19efe99
--- /dev/null
@@ -0,0 +1,48 @@
+meteo_df = read.csv("meteo.csv")
+
+meteo_df$Season = 0
+meteo_df$Week = 0
+meteo_df$Pollution = -1
+
+#Need to load and aggregate PM10 by days
+pm10_df = read.csv("pm10_mesures.csv")
+
+line_number = 1 #line number in pm10 file
+for (i in 1:(nrow(meteo_df)))
+{
+       pm10s = c()
+       repeat {
+       {
+               pm10s = c(pm10s, pm10_df[line_number,2])
+               line_number = line_number + 1
+       };
+       if (line_number >= nrow(pm10_df)+1
+               || strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15')
+       {
+               break
+       }}
+       pm10_level = mean(pm10s, na.rm=TRUE)
+       #Fill Pollution column: -1 if no info, 0 to 2 for pollution level
+       if (!is.nan(pm10_level))
+       {
+               if (pm10_level < 30)
+                       meteo_df$Pollution[i] = 0
+               else if (pm10_level <= 50)
+                       meteo_df$Pollution[i] = 1
+               else #pm10 > 50
+                       meteo_df$Pollution[i] = 2
+       }
+
+       #Also fill season + days of week variables
+       meteo_df$Season[i] = ifelse(
+               strsplit(as.character(meteo_df$Date[i]),'/')[[1]][1] %in% c("4","5","6","7","8"),
+               1, 0)
+       current_datetime = strptime(as.character(meteo_df$Date[i]), "%m/%d/%Y", tz="GMT")
+       meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1)
+}
+
+#see also:
+#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
+
+#Finally write new data
+write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)