meteo_df = read.csv("meteo.csv") #inspiration #http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame meteo_df$Season = 0 meteo_df$Week = 0 meteo_df$Pollution = -1 #Need to load and aggregate PM10 by days: use getData() from package data = getData(..., predict_at=0) #TODO: for (i in 1:nrow(meteo_df)) { pm10_level = data$getLevel(i) #Fill Pollution column: -1 if no info, 0 to 2 for pollution level if (!is.nan(pm10_level)) { if (pm10_level < 30) meteo_df$Pollution[i] = 0 else if (pm10_level <= 50) meteo_df$Pollution[i] = 1 else #pm10 > 50 meteo_df$Pollution[i] = 2 } #Also fill season + days of week variables meteo_df$Season[i] = ifelse( strsplit(as.character(meteo_df$Date[i]),'/')[[1]][1] %in% c("4","5","6","7","8"), 1, 0) current_datetime = strptime(as.character(meteo_df$Date[i]), "%m/%d/%Y", tz="GMT") meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1) } #Finally write new data write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)