| 1 | meteo_df = read.csv("meteo.csv") |
| 2 | |
| 3 | meteo_df$Season = 0 |
| 4 | meteo_df$Week = 0 |
| 5 | meteo_df$Pollution = -1 |
| 6 | |
| 7 | #Need to load and aggregate PM10 by days |
| 8 | pm10_df = read.csv("pm10_mesures.csv") |
| 9 | |
| 10 | line_number = 1 #line number in pm10 file |
| 11 | for (i in 1:(nrow(meteo_df))) |
| 12 | { |
| 13 | pm10s = c() |
| 14 | repeat { |
| 15 | { |
| 16 | pm10s = c(pm10s, pm10_df[line_number,2]) |
| 17 | line_number = line_number + 1 |
| 18 | }; |
| 19 | if (line_number >= nrow(pm10_df)+1 |
| 20 | || strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15') |
| 21 | { |
| 22 | break |
| 23 | }} |
| 24 | pm10_level = mean(pm10s, na.rm=TRUE) |
| 25 | #Fill Pollution column: -1 if no info, 0 to 2 for pollution level |
| 26 | if (!is.nan(pm10_level)) |
| 27 | { |
| 28 | if (pm10_level < 30) |
| 29 | meteo_df$Pollution[i] = 0 |
| 30 | else if (pm10_level <= 50) |
| 31 | meteo_df$Pollution[i] = 1 |
| 32 | else #pm10 > 50 |
| 33 | meteo_df$Pollution[i] = 2 |
| 34 | } |
| 35 | |
| 36 | #Also fill season + days of week variables |
| 37 | meteo_df$Season[i] = ifelse( |
| 38 | strsplit(as.character(meteo_df$Date[i]),'/')[[1]][1] %in% c("4","5","6","7","8"), |
| 39 | 1, 0) |
| 40 | current_datetime = strptime(as.character(meteo_df$Date[i]), "%m/%d/%Y", tz="GMT") |
| 41 | meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1) |
| 42 | } |
| 43 | |
| 44 | #see also: |
| 45 | #http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame |
| 46 | |
| 47 | #Finally write new data |
| 48 | write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE) |