From: Benjamin Auder Date: Wed, 29 Mar 2017 19:14:21 +0000 (+0200) Subject: 'update' X-Git-Url: https://git.auder.net/doc/current/%7B%7B%20asset%28%27mixstore/images/img/%24%7BgetWhatsApp%28link%29%7D?a=commitdiff_plain;h=445e7bbc18aa739ec0b3caba4d8710a9d9e1a43c;p=talweg.git 'update' --- diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index 849ff92..b1697df 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -23,14 +23,13 @@ Suggests: LazyData: yes URL: http://git.auder.net/?p=talweg.git License: MIT + file LICENSE -RoxygenNote: 5.0.1 -Collate: +RoxygenNote: 6.0.1 +Collate: 'A_NAMESPACE.R' 'Data.R' 'Forecaster.R' 'F_Average.R' 'F_Neighbors.R' - 'F_Neighbors2.R' 'F_Persistence.R' 'F_Zero.R' 'Forecast.R' diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index c55291a..9ba72b8 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -22,8 +22,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", fdays = getNoNA2(data, max(today-memory,1), today-1) # Get optional args - local = ifelse(hasArg("local"), list(...)$local, FALSE) #same level + season? - simtype = ifelse(hasArg("simtype"), list(...)$simtype, "mix") #or "endo", or "exo" + local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season? + simtype = ifelse(hasArg("simtype"), list(...)$simtype, "none") #or "endo", or "exo" if (hasArg("window")) { return ( private$.predictShapeAux(data, @@ -34,7 +34,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", cv_days = getSimilarDaysIndices(today, data, limit=20, same_season=FALSE, days_in=fdays) - # Optimize h : h |--> sum of prediction errors on last 45 "similar" days + # Optimize h : h |--> sum of prediction errors on last N "similar" days errorOnLastNdays = function(window, simtype) { error = 0 @@ -54,6 +54,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", return (error / nb_jours) } + # TODO: 7 == magic number if (simtype != "endo") { best_window_exo = optimize( @@ -100,9 +101,9 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", return (NA) levelToday = data$getLevel(today) distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday)) - #TODO: 2, 3, 5, 10 magic numbers here... + #TODO: 2, 10, 3, 12 magic numbers here... dist_thresh = 2 - min_neighbs = min(3,length(fdays)) + min_neighbs = min(10,length(fdays)) repeat { same_pollution = (distances <= dist_thresh) @@ -112,7 +113,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", dist_thresh = dist_thresh + 3 } fdays = fdays[same_pollution] - max_neighbs = 10 + max_neighbs = 12 if (nb_neighbs > max_neighbs) { # Keep only max_neighbs closest neighbors @@ -133,7 +134,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", else fdays = fdays_cut #no conditioning - if (simtype != "exo") + if (simtype == "endo" || simtype == "mix") { # Compute endogen similarities using given window window_endo = ifelse(simtype=="mix", window[1], window) @@ -154,10 +155,10 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", simils_endo = exp(-distances2/(sd_dist*window_endo^2)) } - if (simtype != "endo") + if (simtype == "exo" || simtype == "mix") { # Compute exogen similarities using given window - h_exo = ifelse(simtype=="mix", window[2], window) + window_exo = ifelse(simtype=="mix", window[2], window) M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) ) M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) ) @@ -192,8 +193,10 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", simils_exo else if (simtype == "endo") simils_endo - else #mix + else if (simtype == "mix") simils_endo * simils_exo + else #none + rep(1, length(fdays)) similarities = similarities / sum(similarities) prediction = rep(0, horizon) diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index d635560..24f114e 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -27,8 +27,7 @@ #' @examples #' ts_data = system.file("extdata","pm10_mesures_H_loc.csv",package="talweg") #' exo_data = system.file("extdata","meteo_extra_noNAs.csv",package="talweg") -#' data = getData(ts_data, exo_data, input_tz = "Europe/Paris", -#' working_tz="Europe/Paris", predict_at=7) +#' data = getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", predict_at=7) #' pred = computeForecast(data, 2200:2230, "Persistence", "Persistence", 500, 12) #' \dontrun{#Sketch for real-time mode: #' data = new("Data", ...) diff --git a/reports/report.gj b/reports/report.gj index 5e57660..8e0f9ea 100644 --- a/reports/report.gj +++ b/reports/report.gj @@ -4,15 +4,15 @@ J'ai fait quelques essais dans deux configurations pour la méthode "Neighbors" (la seule dont on a parlé, incorporant désormais la "variante Bruno/Michel"). - * avec simtype="mix" et raccordement simple ("Zero") dans le cas "non local", i.e. on va - chercher des voisins n'importe où du moment qu'ils correspondent à deux jours consécutifs sans - valeurs manquantes. - * avec simtype="endo" et raccordement "Neighbor" dans le cas "local" : voisins de même niveau de - pollution et même saison. + * avec simtype="mix" et raccordement "Neighbors" (p1) dans le cas "non local", i.e. on va + chercher des voisins n'importe où du moment qu'ils correspondent au premier élément d'un + couple de deux jours consécutifs sans valeurs manquantes. + * avec simtype="endo" + raccordement "Neighbors" (p2) puis "none" (p3, moyenne simple) + raccordement + "Zero" (sans ajustement) dans le cas "local" : voisins de même niveau de pollution et même saison. J'ai systématiquement comparé à une approche naïve : la moyenne des lendemains des jours -"similaires" dans tout le passé, ainsi qu'à la persistence -- reproduisant le jour courant ou -allant chercher le futur similaire une semaine avant. +"similaires" dans tout le passé (p4), ainsi qu'à la persistence (p5) -- reproduisant le jour courant ou +allant chercher le futur similaire une semaine avant (argument "same_day"). Ensuite j'affiche les erreurs, quelques courbes prévues/mesurées, quelques filaments puis les histogrammes de quelques poids. Concernant les graphes de filaments, la moitié gauche du graphe @@ -43,22 +43,25 @@ indices_np = seq(as.Date("2015-04-26"),as.Date("2015-05-02"),"days") -----

${list_titles[i]}

-----r -p_n = computeForecast(data, ${list_indices[i]}, "Neighbors", "Zero", horizon=H, +p1 = computeForecast(data, ${list_indices[i]}, "Neighbors", "Neighbors", horizon=H, simtype="mix", local=FALSE) -p_l = computeForecast(data, ${list_indices[i]}, "Neighbors", "Neighbors", horizon=H, +p2 = computeForecast(data, ${list_indices[i]}, "Neighbors", "Neighbors", horizon=H, simtype="endo", local=TRUE) -p_a = computeForecast(data, ${list_indices[i]}, "Average", "Zero", horizon=H) -p_p = computeForecast(data, ${list_indices[i]}, "Persistence", "Zero", horizon=H, +p3 = computeForecast(data, ${list_indices[i]}, "Neighbors", "Zero", horizon=H, + simtype="none", local=TRUE) +p4 = computeForecast(data, ${list_indices[i]}, "Average", "Zero", horizon=H) +p5 = computeForecast(data, ${list_indices[i]}, "Persistence", "Zero", horizon=H, same_day=${'TRUE' if loop.index < 2 else 'FALSE'}) -----r -e_n = computeError(data, p_n, H) -e_l = computeError(data, p_nl, H) -e_a = computeError(data, p_a, H) -e_p = computeError(data, p_p, H) +e1 = computeError(data, p_nz_mf, H) +e2 = computeError(data, p_nz_mfl, H) +e3 = computeError(data, p_a, H) +e4 = computeError(data, p_p, H) options(repr.plot.width=9, repr.plot.height=7) plotError(list(e_n, e_p, e_a, e_l), cols=c(1,2,colors()[258], 4)) -# Noir: Neighbors non-local, bleu: Neighbors local, vert: moyenne, rouge: persistence +# noir: Neighbors non-local (p1), bleu: Neighbors local endo (p2), mauve: Neighbors local none (p3), +# vert: moyenne (p4), rouge: persistence (p5) i_np = which.min(e_n$abs$indices) i_p = which.max(e_n$abs$indices) @@ -66,54 +69,52 @@ i_p = which.max(e_n$abs$indices) options(repr.plot.width=9, repr.plot.height=4) par(mfrow=c(1,2)) -plotPredReal(data, p_n, i_np); title(paste("PredReal non-loc day",i_np)) -plotPredReal(data, p_n, i_p); title(paste("PredReal non-loc day",i_p)) +plotPredReal(data, p1, i_np); title(paste("PredReal p1 day",i_np)) +plotPredReal(data, p1, i_p); title(paste("PredReal p1 day",i_p)) -plotPredReal(data, p_l, i_np); title(paste("PredReal loc day",i_np)) -plotPredReal(data, p_l, i_p); title(paste("PredReal loc day",i_p)) +plotPredReal(data, p2, i_np); title(paste("PredReal p2 day",i_np)) +plotPredReal(data, p2, i_p); title(paste("PredReal p2 day",i_p)) -plotPredReal(data, p_a, i_np); title(paste("PredReal avg day",i_np)) -plotPredReal(data, p_a, i_p); title(paste("PredReal avg day",i_p)) +plotPredReal(data, p3, i_np); title(paste("PredReal p3 day",i_np)) +plotPredReal(data, p3, i_p); title(paste("PredReal p3 day",i_p)) # Bleu: prévue, noir: réalisée -----r par(mfrow=c(1,2)) -f_np_n = computeFilaments(data, p_n, i_np, plot=TRUE); title(paste("Filaments non-loc day",i_np)) -f_p_n = computeFilaments(data, p_n, i_p, plot=TRUE); title(paste("Filaments non-loc day",i_p)) +f_np1 = computeFilaments(data, p1, i_np, plot=TRUE); title(paste("Filaments p1 day",i_np)) +f_p1 = computeFilaments(data, p1, i_p, plot=TRUE); title(paste("Filaments p1 day",i_p)) -f_np_l = computeFilaments(data, p_l, i_np, plot=TRUE); title(paste("Filaments loc day",i_np)) -f_p_l = computeFilaments(data, p_l, i_p, plot=TRUE); title(paste("Filaments loc day",i_p)) +f_np2 = computeFilaments(data, p2, i_np, plot=TRUE); title(paste("Filaments p2 day",i_np)) +f_p2 = computeFilaments(data, p2, i_p, plot=TRUE); title(paste("Filaments p2 day",i_p)) -----r par(mfrow=c(1,2)) -plotFilamentsBox(data, f_np_n); title(paste("FilBox non-loc day",i_np)) -plotFilamentsBox(data, f_p_n); title(paste("FilBox non-loc day",i_p)) +plotFilamentsBox(data, f_np1); title(paste("FilBox p1 day",i_np)) +plotFilamentsBox(data, f_p1); title(paste("FilBox p1 day",i_p)) -# Generally too few neighbors: -#plotFilamentsBox(data, f_np_l); title(paste("FilBox loc day",i_np)) -#plotFilamentsBox(data, f_p_l); title(paste("FilBox loc day",i_p)) +# Too few neighbors in the local case for this plot -----r par(mfrow=c(1,2)) -plotRelVar(data, f_np_n); title(paste("StdDev non-loc day",i_np)) -plotRelVar(data, f_p_n); title(paste("StdDev non-loc day",i_p)) +plotRelVar(data, f_np1); title(paste("StdDev p1 day",i_np)) +plotRelVar(data, f_p1); title(paste("StdDev p1 day",i_p)) -plotRelVar(data, f_np_l); title(paste("StdDev loc day",i_np)) -plotRelVar(data, f_p_l); title(paste("StdDev loc day",i_p)) +plotRelVar(data, f_np2); title(paste("StdDev p2 day",i_np)) +plotRelVar(data, f_p2); title(paste("StdDev p2 day",i_p)) # Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir -----r par(mfrow=c(1,2)) -plotSimils(p_n, i_np); title(paste("Weights non-loc day",i_np)) -plotSimils(p_n, i_p); title(paste("Weights non-loc day",i_p)) +plotSimils(p1, i_np); title(paste("Weights p1 day",i_np)) +plotSimils(p1, i_p); title(paste("Weights p1 day",i_p)) -plotSimils(p_l, i_np); title(paste("Weights loc day",i_np)) -plotSimils(p_l, i_p); title(paste("Weights loc day",i_p)) +plotSimils(p2, i_np); title(paste("Weights p2 day",i_np)) +plotSimils(p2, i_p); title(paste("Weights p2 day",i_p)) # - pollué à gauche, + pollué à droite -----r # Fenêtres sélectionnées dans ]0,7] / non-loc à gauche, loc à droite -p_n$getParams(i_np)$window -p_n$getParams(i_p)$window +p1$getParams(i_np)$window +p1$getParams(i_p)$window -p_l$getParams(i_np)$window -p_l$getParams(i_p)$window +p2$getParams(i_np)$window +p2$getParams(i_p)$window % endfor