Initial commit
[talweg.git] / data / scripts / augment_meteo.R
1 meteo_df = read.csv("meteo.csv")
2
3 meteo_df$Season = 0
4 meteo_df$Week = 0
5 meteo_df$Pollution = -1
6
7 #Need to load and aggregate PM10 by days
8 pm10_df = read.csv("pm10_mesures.csv")
9
10 line_number = 1 #line number in pm10 file
11 for (i in 1:(nrow(meteo_df)))
12 {
13 pm10s = c()
14 repeat {
15 {
16 pm10s = c(pm10s, pm10_df[line_number,2])
17 line_number = line_number + 1
18 };
19 if (line_number >= nrow(pm10_df)+1
20 || strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15')
21 {
22 break
23 }}
24 pm10_level = mean(pm10s, na.rm=TRUE)
25 #Fill Pollution column: -1 if no info, 0 to 2 for pollution level
26 if (!is.nan(pm10_level))
27 {
28 if (pm10_level < 30)
29 meteo_df$Pollution[i] = 0
30 else if (pm10_level <= 50)
31 meteo_df$Pollution[i] = 1
32 else #pm10 > 50
33 meteo_df$Pollution[i] = 2
34 }
35
36 #Also fill season + days of week variables
37 meteo_df$Season[i] = ifelse(
38 strsplit(as.character(meteo_df$Date[i]),'/')[[1]][1] %in% c("4","5","6","7","8"),
39 1, 0)
40 current_datetime = strptime(as.character(meteo_df$Date[i]), "%m/%d/%Y", tz="GMT")
41 meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1)
42 }
43
44 #see also:
45 #http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
46
47 #Finally write new data
48 write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)