X-Git-Url: https://git.auder.net/?p=talweg.git;a=blobdiff_plain;f=data%2Fscripts%2Faugment_meteo.R;fp=data%2Fscripts%2Faugment_meteo.R;h=19efe99f7decc9f23ae3a12b8dc067afb4c8abd4;hp=0000000000000000000000000000000000000000;hb=613a986f1517ab5a641771da62eba6c7f5a41577;hpb=6d97bfecf7310ed6682eecce1b7aa2f8185d4742 diff --git a/data/scripts/augment_meteo.R b/data/scripts/augment_meteo.R new file mode 100644 index 0000000..19efe99 --- /dev/null +++ b/data/scripts/augment_meteo.R @@ -0,0 +1,48 @@ +meteo_df = read.csv("meteo.csv") + +meteo_df$Season = 0 +meteo_df$Week = 0 +meteo_df$Pollution = -1 + +#Need to load and aggregate PM10 by days +pm10_df = read.csv("pm10_mesures.csv") + +line_number = 1 #line number in pm10 file +for (i in 1:(nrow(meteo_df))) +{ + pm10s = c() + repeat { + { + pm10s = c(pm10s, pm10_df[line_number,2]) + line_number = line_number + 1 + }; + if (line_number >= nrow(pm10_df)+1 + || strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15') + { + break + }} + pm10_level = mean(pm10s, na.rm=TRUE) + #Fill Pollution column: -1 if no info, 0 to 2 for pollution level + if (!is.nan(pm10_level)) + { + if (pm10_level < 30) + meteo_df$Pollution[i] = 0 + else if (pm10_level <= 50) + meteo_df$Pollution[i] = 1 + else #pm10 > 50 + meteo_df$Pollution[i] = 2 + } + + #Also fill season + days of week variables + meteo_df$Season[i] = ifelse( + strsplit(as.character(meteo_df$Date[i]),'/')[[1]][1] %in% c("4","5","6","7","8"), + 1, 0) + current_datetime = strptime(as.character(meteo_df$Date[i]), "%m/%d/%Y", tz="GMT") + meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1) +} + +#see also: +#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame + +#Finally write new data +write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)