From: Benjamin Auder Date: Thu, 27 Apr 2017 14:37:18 +0000 (+0200) Subject: TODO: comprendre pkoi pas mêmes voisins X-Git-Url: https://git.auder.net/doc/html/pieces/css/scripts/R.css?a=commitdiff_plain;h=dca259e4e9c0235cb113b329c27157ac59d7242b;p=talweg.git TODO: comprendre pkoi pas mêmes voisins --- diff --git a/Etude_En_Cours_R_E_bis.R b/Etude_En_Cours_R_E_bis.R index aa0d490..457cf79 100644 --- a/Etude_En_Cours_R_E_bis.R +++ b/Etude_En_Cours_R_E_bis.R @@ -1,5 +1,7 @@ +predi <- function(ij) +{ #setwd("/Users/bp/Desktop/CONTRATS_AirNormand/2016/RapportFinalBruno") -rm(list=ls()) +#rm(list=ls()) # Lecture des données: pm = dataframe 2 colonnes, date-time puis PM10 horaire pm = read.table("DATA/mesures_horaires_hloc_pm10_a_filer.csv",sep=",",dec=".",header=T) @@ -48,7 +50,7 @@ ResDates = NULL nbvois=10 j=1 # numéro de semaine -ij=6 # numéro du jour (0 = lundi) +#ij=6 # numéro du jour (0 = lundi) Err24 = NULL ErrPrev = NULL @@ -79,6 +81,18 @@ Kvois = NULL large = 1 bornes = mean(dataj[25:48]) + c(-large,large) indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2] + if (sum(indcond) < 10) + { + large = 2 + bornes = mean(dataj[25:48]) + c(-large,large) + indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2] + } + while (sum(indcond) < 10) + { + large = large + 3 + bornes = mean(dataj[25:48]) + c(-large,large) + indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2] + } data = data[indcond,] #pollution du 2eme jour == pollution du jour courant +/- 1 varexp = varexp[indcond,] @@ -102,6 +116,8 @@ Kvois = NULL } else { erreurPrev = mean(abs(dataj[(H+1):48] - JourMoy[(H+1):48])) } + + list(serie=dataj, prev=JourMoy, err=erreurPrev, line=(nl+ij), neighbs=ind, dates=data[ind,1]) # erreur24 = mean(abs(dataj[25:48] - JourMoy[25:48])) # #png(NomFile) # matplot(t(data[ind, 1:48]), type = "l", lwd=1.4, lty=1, col=1:length(ind), @@ -149,3 +165,4 @@ Kvois = NULL ## xx = dev.off() # ##length(D) +} diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 02536eb..60eb7b7 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -61,8 +61,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", if (!opera) tdays = setdiff(tdays, today) #always exclude current day - # Shortcut if window is known - if (hasArg("window")) + # Shortcut if window is known or local==TRUE && simtype==none + if (hasArg("window") || (local && simtype=="none")) { return ( private$.predictShapeAux(data, tdays, today, predict_from, horizon, local, list(...)$window, simtype, opera, TRUE) ) @@ -129,8 +129,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", if (local) { - # TODO: 60 == magic number - tdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE, + # limit=Inf to not censor any day (TODO: finite limit? 60?) + tdays = getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE, days_in=tdays_cut, operational=opera) # if (length(tdays) <= 1) # return (NA) @@ -146,6 +146,13 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", } return ( data$getSerie(tdays[1])[predict_from:horizon] ) } + max_neighbs = 12 #TODO: 12 = arbitrary number + if (length(tdays) > max_neighbs) + { + distances2 <- .computeDistsEndo(data, today, tdays, predict_from) + ordering <- order(distances2) + tdays <- tdays[ ordering[1:max_neighbs] ] + } } else tdays = tdays_cut #no conditioning @@ -156,19 +163,9 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", window_endo = ifelse(simtype=="mix", window[1], window) # Distances from last observed day to selected days in the past + # TODO: redundant computation if local==TRUE distances2 <- .computeDistsEndo(data, today, tdays, predict_from) - if (local) - { - max_neighbs = 12 #TODO: 12 = arbitrary number - if (length(distances2) > max_neighbs) - { - ordering <- order(distances2) - tdays <- tdays[ ordering[1:max_neighbs] ] - distances2 <- distances2[ ordering[1:max_neighbs] ] - } - } - simils_endo <- .computeSimils(distances2, window_endo) } diff --git a/pkg/R/utils.R b/pkg/R/utils.R index bb76996..bc80030 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -78,6 +78,8 @@ getSimilarDaysIndices = function(index, data, limit, same_season, j = index + 1 while (length(days) < min( limit, ifelse(is.null(days_in),Inf,length(days_in)) )) { + if (i < 1 && j > data$getSize()) + break if (i >= 1) { dt = as.POSIXlt(data$getTime(i)[1]) @@ -111,6 +113,8 @@ getSimilarDaysIndices = function(index, data, limit, same_season, # .isSameSeason = function(month, month_ref) { + if (month_ref == 3) #TODO: same as Bruno (but weird) + return (month %in% c(2,3,4,9,10)) if (month_ref %in% c(11,12,1,2)) #~= mid-polluted return (month %in% c(11,12,1,2)) if (month_ref %in% c(3,4,9,10)) #~= high-polluted