c762fe81470fc93fb1a515fb6ae9c8d21a63b192
[talweg.git] / data / scripts / augment_meteo.R
1 meteo_df = read.csv("meteo.csv")
2
3 #inspiration
4 #http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
5
6 meteo_df$Season = 0
7 meteo_df$Week = 0
8 meteo_df$Pollution = -1
9
10 #Need to load and aggregate PM10 by days: use getData() from package
11 ts_data = system.file("extdata","pm10_mesures_H_loc.csv",package="talweg")
12 exo_data = system.file("extdata","meteo_extra_noNAs.csv",package="talweg")
13 data = getData(ts_data, exo_data, input_tz = "Europe/Paris",
14 working_tz="Europe/Paris", predict_at=0)
15
16 for (i in 1:nrow(meteo_df))
17 {
18 pm10_level = data$getLevel(i)
19 #Fill Pollution column: -1 if no info, 0 to 2 for pollution level
20 if (!is.nan(pm10_level))
21 {
22 if (pm10_level < 30)
23 meteo_df$Pollution[i] = 0
24 else if (pm10_level <= 50)
25 meteo_df$Pollution[i] = 1
26 else #pm10 > 50
27 meteo_df$Pollution[i] = 2
28 }
29
30 #Also fill season + days of week variables
31 meteo_df$Season[i] = ifelse(
32 strsplit(as.character(meteo_df$Date[i]),'/')[[1]][1] %in% c("4","5","6","7","8"),
33 1, 0)
34 current_datetime = strptime(as.character(meteo_df$Date[i]), "%m/%d/%Y", tz="GMT")
35 meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1)
36 }
37
38 #Finally write new data
39 write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)