| 1 | predi <- function(ij) |
| 2 | { |
| 3 | #setwd("/Users/bp/Desktop/CONTRATS_AirNormand/2016/RapportFinalBruno") |
| 4 | #rm(list=ls()) |
| 5 | |
| 6 | # Lecture des données: pm = dataframe 2 colonnes, date-time puis PM10 horaire |
| 7 | pm = read.table("DATA/mesures_horaires_hloc_pm10_a_filer.csv",sep=",",dec=".",header=T) |
| 8 | #n = dim(pm)[1] |
| 9 | #datedebut = "10/12/2008" |
| 10 | |
| 11 | # Chargement des données météo et indicateurs: VarExp matrice des données météo, |
| 12 | # première colonne = date, première rangée = second jour |
| 13 | VarExp <- read.table("DATA/meteo_extra_jourMois.csv",sep=",",dec=".",header=T) |
| 14 | VarExp <- VarExp[-1,] |
| 15 | |
| 16 | # Lecture des dates: ??? sans doute liste des jours de 10/12/2008 à 06/2016... |
| 17 | #dates = read.table("DATA/Dates_jours.csv",header=F,as.is=T) |
| 18 | dates = VarExp[,1] #dates[,1] |
| 19 | |
| 20 | # Contruction des matrices de données: pm.h = matrice des séries en ligne |
| 21 | pm.h <- matrix(pm[,2],ncol=24,byrow=TRUE) |
| 22 | Nlignes = nrow(pm.h) |
| 23 | |
| 24 | # Data = matrice des couples de jours (séries de 48 PM10), |
| 25 | # première ligne = jour 1,2, dernière = jour N-1,N |
| 26 | Data = cbind(pm.h[1:(Nlignes -1), ], pm.h[2:Nlignes, ]) |
| 27 | |
| 28 | # dates2 = dates du 2eme jour au dernier |
| 29 | dates2 = dates[2:Nlignes] |
| 30 | |
| 31 | rownames(Data) = dates2 |
| 32 | # df contient l'ensemble des données. |
| 33 | #df <- cbind(Data,varexp[,-1]) |
| 34 | df <- Data |
| 35 | |
| 36 | # Complétion des variables exogènes (du 2eme jour) par les PM10 moyen ce même jour |
| 37 | PMjour <- apply(df[,25:48],1,mean,na.rm=T) |
| 38 | dfexp <- cbind(VarExp,PMjour) |
| 39 | |
| 40 | Dates = c( #Month/Day/Year, as in meteo file |
| 41 | "3/16/2015", |
| 42 | "1/19/2015", |
| 43 | "4/27/2015") |
| 44 | |
| 45 | Categorie = c("Epandage", "Chauffage", "Non Polluée") |
| 46 | |
| 47 | RepFig = "FIGURES_Etude" |
| 48 | |
| 49 | ResDates = NULL |
| 50 | |
| 51 | nbvois=10 |
| 52 | j=1 # numéro de semaine |
| 53 | #ij=6 # numéro du jour (0 = lundi) |
| 54 | |
| 55 | Err24 = NULL |
| 56 | ErrPrev = NULL |
| 57 | Kvois = NULL |
| 58 | |
| 59 | #for (Hc in 5:24) |
| 60 | #{ |
| 61 | Hc = 7 #predict at 7:00, from 8:00 |
| 62 | H=24+Hc #Hc: dernier PM10 connu avant prédiction des 24-Hc restants, le jour 2 |
| 63 | L = 1:H |
| 64 | |
| 65 | # Premier conditionnement : mois |
| 66 | indcond <- dfexp[,"Mois"] == 2 | dfexp[,"Mois"] == 3 | dfexp[,"Mois"] == 4 | dfexp[,"Mois"] == 9 | dfexp[,"Mois"] == 10 |
| 67 | data = df[indcond,] #restriction aux couples dont le mois du 2eme jour est 2,3,4,9 ou 10 |
| 68 | varexp = dfexp[indcond,] #de même sur les exogènes (+ PM10 moyens) |
| 69 | |
| 70 | nl = (1:nrow(data))[rownames(data)==Dates[j]] #numéro de ligne où 2eme jour == 16/03/2015 |
| 71 | dateJPrev = rownames(data)[nl+ij] # translation : date du jour à prévoir (2eme colonne) |
| 72 | dataj = as.numeric(data[nl+ij, 1:48]) #extraction de cette série dans dataj |
| 73 | data = data[-(nl + ij), ] #suppression de cette série dans data |
| 74 | varexp = varexp[-(nl + ij), ] #idem dans les variables exogènes |
| 75 | indNA = attr(na.omit(data[, 1:48]),"na.action") |
| 76 | # print(indNA) |
| 77 | data = data[-indNA,] #remove all pairs of days with NAs |
| 78 | varexp = varexp[-indNA,] #and remove corresponding exogenous variables |
| 79 | |
| 80 | # Second conditionnement : les jours avec PMjour +/- large |
| 81 | large = 1 |
| 82 | bornes = mean(dataj[25:48]) + c(-large,large) |
| 83 | indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2] |
| 84 | if (sum(indcond) < 10) |
| 85 | { |
| 86 | large = 2 |
| 87 | bornes = mean(dataj[25:48]) + c(-large,large) |
| 88 | indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2] |
| 89 | } |
| 90 | while (sum(indcond) < 10) |
| 91 | { |
| 92 | large = large + 3 |
| 93 | bornes = mean(dataj[25:48]) + c(-large,large) |
| 94 | indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2] |
| 95 | } |
| 96 | data = data[indcond,] #pollution du 2eme jour == pollution du jour courant +/- 1 |
| 97 | varexp = varexp[indcond,] |
| 98 | |
| 99 | D = rep(0,nrow(data)) |
| 100 | for (k in 1:nrow(data)) |
| 101 | { |
| 102 | #D[k] = sqrt(sum((1:H)*(dataj[L] - data[k,L])^2)) |
| 103 | D[k] = sqrt(sum((dataj[L] - data[k,L])^2)) |
| 104 | } |
| 105 | ind = order(D)[1:nbvois] |
| 106 | # w = 1/(D[ind]^2) |
| 107 | # w = w/sum(w) |
| 108 | # W = w %o% rep(1,48) |
| 109 | |
| 110 | #print("Voisins + ij") |
| 111 | #print(sort(D)[1:nbvois]) |
| 112 | #print(dateJPrev) |
| 113 | #print(rownames(data)[sort(ind)]) |
| 114 | #print(data[sort(ind), 1:48]) |
| 115 | |
| 116 | JourMoy = apply(data[ind, 1:48], 2, mean) |
| 117 | #JourMoy = apply(W*data[ind, 1:48], 2, sum) |
| 118 | NomFile = paste("Voisins_Epandage_PMjour_Hc_",Hc,".png",sep="") |
| 119 | Titre = paste("Jour à prévoir : ",dateJPrev," - ", length(ind)," voisins",sep="") |
| 120 | #erreur = sqrt(sum((dataj[25:48] - JourMoy[25:48])^2)) |
| 121 | if(Hc==24) { |
| 122 | erreurPrev = NA |
| 123 | } else { |
| 124 | erreurPrev = mean(abs(dataj[(H+1):48] - JourMoy[(H+1):48])) |
| 125 | } |
| 126 | |
| 127 | list(serie=dataj, prev=JourMoy, err=erreurPrev, line=(nl+ij), neighbs=ind, dates=data[ind,1]) |
| 128 | # erreur24 = mean(abs(dataj[25:48] - JourMoy[25:48])) |
| 129 | # #png(NomFile) |
| 130 | # matplot(t(data[ind, 1:48]), type = "l", lwd=1.4, lty=1, col=1:length(ind), |
| 131 | # cex.axis=1.4, cex.main = 1.7, cex.lab=1.5, |
| 132 | # xlab="Heures locales", ylab=paste0("PM10 - Erreurs = ",round(erreur24,1), |
| 133 | # " / ",round(erreurPrev,1)), main=Titre) |
| 134 | # legend("top",rownames(data)[ind],ncol=2,lwd=1.4, lty=1, col=1:length(ind)) |
| 135 | # lines(1:48, dataj, lwd=2.5) |
| 136 | # lines(JourMoy, lty = 2, lwd=2) |
| 137 | # abline(v=c(24.5,H+0.5), lty = 2, lwd=1.2) |
| 138 | # #xx=dev.off() |
| 139 | # |
| 140 | # Err24 = c(Err24, erreur24) |
| 141 | # ErrPrev = c(ErrPrev, erreurPrev) |
| 142 | # ResDates = cbind(ResDates, rownames(data)[ind]) |
| 143 | #} |
| 144 | |
| 145 | #rownames(ResDates) = 1:10 |
| 146 | # |
| 147 | #Kvois = NULL |
| 148 | #for (Col in ncol(ResDates):1) |
| 149 | #{ |
| 150 | # K = 0 |
| 151 | # for (I in 1:10) |
| 152 | # { |
| 153 | # for (J in 1:10) |
| 154 | # { |
| 155 | # if (ResDates[I,1] == ResDates[J,Col]) |
| 156 | # K = K +1 |
| 157 | # } |
| 158 | # } |
| 159 | # Kvois = c(Kvois, K) |
| 160 | #} |
| 161 | # |
| 162 | ## pdf("Erreur_Epandage_PMjour.pdf") |
| 163 | #ymin = min(na.omit(ErrPrev), Err24) |
| 164 | #ymin = min(ymin, Kvois) |
| 165 | #ymax = max(na.omit(ErrPrev), Err24) |
| 166 | #ymax = max(ymax, Kvois) |
| 167 | #plot(5:24,Err24, type = "l", lwd = 2, cex.axis=1.4, cex.lab = 1.5, |
| 168 | # ylab = "MAE",xlab = "Heures de prévision", ylim= c(ymin,ymax)) |
| 169 | #lines(5:24,ErrPrev, lwd=2, lty = 2) |
| 170 | #legend("topright", legend=c("Erreur 24h", "Erreur prévision"), lty = c(1,2), lwd=2, cex=1.5) |
| 171 | #points(5:24, Kvois, cex=1.8, pch = 19) |
| 172 | ## xx = dev.off() |
| 173 | # |
| 174 | ##length(D) |
| 175 | } |