No longer direct predict for Neighbors2: recollement comme Neighbors (better)
authorBenjamin Auder <benjamin.auder@somewhere>
Wed, 29 Mar 2017 01:06:08 +0000 (03:06 +0200)
committerBenjamin Auder <benjamin.auder@somewhere>
Wed, 29 Mar 2017 01:06:08 +0000 (03:06 +0200)
pkg/R/F_Neighbors.R
pkg/R/F_Neighbors2.R
pkg/R/utils.R
reports/report.ipynb

index d889a34..c8a3355 100644 (file)
@@ -30,12 +30,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
                                        fdays, today, horizon, list(...)$h_window, kernel, simtype, TRUE) )
                        }
 
-                       # Indices of similar days for cross-validation; TODO: 45 = magic number
-                       sdays = getSimilarDaysIndices(today, data, limit=45, same_season=FALSE)
-
-                       cv_days = intersect(fdays,sdays)
-                       # Limit to 20 most recent matching days (TODO: 20 == magic number)
-                       cv_days = sort(cv_days,decreasing=TRUE)[1:min(20,length(cv_days))]
+                       # Indices of similar days for cross-validation; TODO: 20 = magic number
+                       cv_days = getSimilarDaysIndices(today, data, limit=20, same_season=FALSE, days_in=fdays)
 
                        # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
                        errorOnLastNdays = function(h, kernel, simtype)
index 787dd2b..69e69dc 100644 (file)
@@ -9,11 +9,6 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
        inherit = Forecaster,
 
        public = list(
-               predictSerie = function(data, today, memory, horizon, ...)
-               {
-                       # This method predict shape + level at the same time, all in next call
-                       self$predictShape(data, today, memory, horizon, ...)
-               },
                predictShape = function(data, today, memory, horizon, ...)
                {
                        # (re)initialize computed parameters
@@ -35,12 +30,8 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                                        fdays, today, horizon, list(...)$h_window, kernel, simtype, TRUE) )
                        }
 
-                       # Indices of similar days for cross-validation; TODO: 45 = magic number
-                       sdays = getSimilarDaysIndices(today, data, limit=45, same_season=FALSE)
-
-                       cv_days = intersect(fdays,sdays)
-                       # Limit to 20 most recent matching days (TODO: 20 == magic number)
-                       cv_days = sort(cv_days,decreasing=TRUE)[1:min(20,length(cv_days))]
+                       # Indices of similar days for cross-validation; TODO: 20 = magic number
+                       cv_days = getSimilarDaysIndices(today, data, limit=20, same_season=FALSE, days_in=fdays)
 
                        # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
                        errorOnLastNdays = function(h, kernel, simtype)
@@ -56,7 +47,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                                        {
                                                nb_jours = nb_jours + 1
                                                error = error +
-                                                       mean((data$getSerie(cv_days[i]+1)[1:horizon] - prediction)^2)
+                                                       mean((data$getCenteredSerie(cv_days[i]+1)[1:horizon] - prediction)^2)
                                        }
                                }
                                return (error / nb_jours)
@@ -95,36 +86,35 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                # Precondition: "today" is full (no NAs)
                .predictShapeAux = function(data, fdays, today, horizon, h, kernel, simtype, final_call)
                {
-                       fdays = fdays[ fdays < today ]
+                       fdays_cut = fdays[ fdays < today ]
                        # TODO: 3 = magic number
-                       if (length(fdays) < 3)
+                       if (length(fdays_cut) < 3)
                                return (NA)
 
-                       # Neighbors: days in "same season"
-                       sdays = getSimilarDaysIndices(today, data, limit=45, same_season=TRUE)
-                       indices = intersect(fdays,sdays)
-                       if (length(indices) <= 1)
+                       # Neighbors: days in "same season"; TODO: 60 == magic number...
+                       fdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE, days_in=fdays_cut)
+                       if (length(fdays) <= 1)
                                return (NA)
                        levelToday = data$getLevel(today)
-                       distances = sapply(indices, function(i) abs(data$getLevel(i)-levelToday))
-                       # 2 and 5 below == magic numbers (determined by Bruno & Michel)
-                       same_pollution = (distances <= 2)
-                       if (sum(same_pollution) == 0)
+                       distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday))
+                       dist_thresh = 1
+                       repeat
                        {
-                               same_pollution = (distances <= 5)
-                               if (sum(same_pollution) == 0)
-                                       return (NA)
+                               same_pollution = (distances <= dist_thresh)
+                               if (sum(same_pollution) >= 2) #will eventually happen
+                                       break
+                               dist_thresh = dist_thresh + 1
                        }
-                       indices = indices[same_pollution]
-                       if (length(indices) == 1)
+                       fdays = fdays[same_pollution]
+                       if (length(fdays) == 1)
                        {
                                if (final_call)
                                {
                                        private$.params$weights <- 1
-                                       private$.params$indices <- indices
+                                       private$.params$indices <- fdays
                                        private$.params$window <- 1
                                }
-                               return ( data$getSerie(indices[1])[1:horizon] ) #what else?!
+                               return ( data$getSerie(fdays[1])[1:horizon] ) #what else?!
                        }
 
                        if (simtype != "exo")
@@ -133,7 +123,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 
                                # Distances from last observed day to days in the past
                                serieToday = data$getSerie(today)
-                               distances2 = sapply(indices, function(i) {
+                               distances2 = sapply(fdays, function(i) {
                                        delta = serieToday - data$getSerie(i)
                                        mean(delta^2)
                                })
@@ -160,21 +150,21 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                        {
                                h_exo = ifelse(simtype=="mix", h[2], h)
 
-                               M = matrix( nrow=1+length(indices), ncol=1+length(data$getExo(today)) )
+                               M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
                                M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
-                               for (i in seq_along(indices))
-                                       M[i+1,] = c( data$getLevel(indices[i]), as.double(data$getExo(indices[i])) )
+                               for (i in seq_along(fdays))
+                                       M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
 
                                sigma = cov(M) #NOTE: robust covariance is way too slow
                                # TODO: 10 == magic number; more robust way == det, or always ginv()
                                sigma_inv =
-                                       if (length(indices) > 10)
+                                       if (length(fdays) > 10)
                                                solve(sigma)
                                        else
                                                MASS::ginv(sigma)
 
                                # Distances from last observed day to days in the past
-                               distances2 = sapply(seq_along(indices), function(i) {
+                               distances2 = sapply(seq_along(fdays), function(i) {
                                        delta = M[1,] - M[i+1,]
                                        delta %*% sigma_inv %*% delta
                                })
@@ -206,14 +196,14 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                                        simils_endo * simils_exo
 
                        prediction = rep(0, horizon)
-                       for (i in seq_along(indices))
-                               prediction = prediction + similarities[i] * data$getSerie(indices[i]+1)[1:horizon]
+                       for (i in seq_along(fdays))
+                               prediction = prediction + similarities[i] * data$getCenteredSerie(fdays[i]+1)[1:horizon]
                        prediction = prediction / sum(similarities, na.rm=TRUE)
 
                        if (final_call)
                        {
                                private$.params$weights <- similarities
-                               private$.params$indices <- indices
+                               private$.params$indices <- fdays
                                private$.params$window <-
                                        if (simtype=="endo")
                                                h_endo
index b3e66e1..3649f58 100644 (file)
@@ -59,9 +59,10 @@ integerIndexToDate = function(index, data)
 #' @param data Reference dataset, object output of \code{getData}
 #' @param limit Maximum number of indices to return
 #' @param same_season Should the indices correspond to day in same season?
+#' @param days_in Optional set to intersect with results (NULL to discard)
 #'
 #' @export
-getSimilarDaysIndices = function(index, data, limit, same_season)
+getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL)
 {
        index = dateIndexToInteger(index, data)
 
@@ -74,7 +75,7 @@ getSimilarDaysIndices = function(index, data, limit, same_season)
        while (i >= 1 && length(days) < limit)
        {
                dt = as.POSIXlt(data$getTime(i)[1])
-               if (.isSameDay(dt$wday, day_ref))
+               if ((is.null(days_in) || i %in% days_in) && .isSameDay(dt$wday, day_ref))
                {
                        if (!same_season || .isSameSeason(dt$mon+1, month_ref))
                                days = c(days, i)
index f0a06d0..8af4acd 100644 (file)
     "<h2>Introduction</h2>\n",
     "\n",
     "J'ai fait quelques essais dans différentes configurations pour la méthode \"Neighbors\"\n",
-    "(la seule dont on a parlé).<br>Il semble que le mieux soit\n",
+    "(la seule dont on a parlé) et sa variante récente appelée pour l'instant \"Neighbors2\",\n",
+    "avec simtype=\"mix\" : deux types de similarités prises en compte, puis multiplication des poids.\n",
+    "Pour Neighbors on prédit le saut (par la moyenne pondérée des sauts passés), et pour Neighbors2\n",
+    "on n'effectue aucun raccordement (prévision directe).\n",
     "\n",
-    " * simtype=\"exo\" ou \"mix\" : similarités exogènes avec/sans endogènes (fenêtre optimisée par VC)\n",
-    " * same_season=FALSE : les indices pour la validation croisée ne tiennent pas compte des saisons\n",
-    " * mix_strategy=\"mult\" : on multiplie les poids (au lieu d'en éteindre)\n",
-    "\n",
-    "J'ai systématiquement comparé à une approche naïve : la moyennes des lendemains des jours\n",
-    "\"similaires\" dans tout le passé ; à chaque fois sans prédiction du saut (sauf pour Neighbors :\n",
-    "prédiction basée sur les poids calculés).\n",
+    "J'ai systématiquement comparé à une approche naïve : la moyenne des lendemains des jours\n",
+    "\"similaires\" dans tout le passé, ainsi qu'à la persistence -- reproduisant le jour courant ou\n",
+    "allant chercher le futur similaire une semaine avant.\n",
     "\n",
     "Ensuite j'affiche les erreurs, quelques courbes prévues/mesurées, quelques filaments puis les\n",
     "histogrammes de quelques poids. Concernant les graphes de filaments, la moitié gauche du graphe\n",
    "source": [
     "library(talweg)\n",
     "\n",
+    "P = 7 #instant de prévision\n",
+    "H = 17 #horizon (en heures)\n",
+    "\n",
     "ts_data = read.csv(system.file(\"extdata\",\"pm10_mesures_H_loc_report.csv\",package=\"talweg\"))\n",
     "exo_data = read.csv(system.file(\"extdata\",\"meteo_extra_noNAs.csv\",package=\"talweg\"))\n",
-    "# Predict from P+1 to P+H included\n",
-    "H = 17\n",
-    "data = getData(ts_data, exo_data, input_tz = \"GMT\", working_tz=\"GMT\", predict_at=7)\n",
+    "# NOTE: 'GMT' because DST gaps are filled and multiple values merged in above dataset.\n",
+    "# Prediction from P+1 to P+H included.\n",
+    "data = getData(ts_data, exo_data, input_tz = \"GMT\", working_tz=\"GMT\", predict_at=P)\n",
     "\n",
     "indices_ch = seq(as.Date(\"2015-01-18\"),as.Date(\"2015-01-24\"),\"days\")\n",
     "indices_ep = seq(as.Date(\"2015-03-15\"),as.Date(\"2015-03-21\"),\"days\")\n",
-    "indices_np = seq(as.Date(\"2015-04-26\"),as.Date(\"2015-05-02\"),\"days\")"
+    "indices_np = seq(as.Date(\"2015-04-26\"),as.Date(\"2015-05-02\"),\"days\")\n"
    ]
   },
   {
@@ -59,6 +61,8 @@
     "editable": true
    },
    "source": [
+    "\n",
+    "\n",
     "<h2 style=\"color:blue;font-size:2em\">Pollution par chauffage</h2>"
    ]
   },
    "outputs": [],
    "source": [
     "reload(\"../pkg\")\n",
-    "#p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n",
-    "#p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n",
-    "p4 = computeForecast(data, indices_ch, \"Neighbors\", \"Neighbors\", horizon=H, simtype=\"mix\")\n",
-    "#p4 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"exo\")\n",
-    "#p5 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "#p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "getSimilarDaysIndices(1000,10,TRUE,data)"
+    "p_nn = computeForecast(data, indices_ch, \"Neighbors\", \"Neighbors\", horizon=H)\n",
+    "p_nn2 = computeForecast(data, indices_ch, \"Neighbors2\", \"Neighbors\", horizon=H)\n",
+    "p_az = computeForecast(data, indices_ch, \"Average\", \"Zero\", horizon=H)\n",
+    "p_pz = computeForecast(data, indices_ch, \"Persistence\", \"Zero\", horizon=H, same_day=TRUE)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
-   },
-   "outputs": [],
-   "source": [
-    "as.POSIXlt(data$getTime(1000)[1])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "deletable": true,
-    "editable": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
-    "#e1 = computeError(data, p1, H)\n",
-    "#e2 = computeError(data, p2, H)\n",
-    "e3 = computeError(data, p3, H)\n",
-    "e4 = computeError(data, p4, H)\n",
-    "#e5 = computeError(data, p5, H)\n",
-    "#e6 = computeError(data, p6, H)\n",
-    "plotError(list(e3,e4), cols=c(1,2))"
+    "p_nn2$getParams(5)$weights"
    ]
   },
   {
    },
    "outputs": [],
    "source": [
-    "\tfirst_day = 1\n",
-    "params=p3$getParams(3)\n",
-    "\tfilter = (params$indices >= first_day)\n",
-    "\tindices = params$indices[filter]\n",
-    "\tweights = params$weights[filter]\n",
-    "\n",
-    "\n",
-    "\tgaps = sapply(indices, function(i) {\n",
-    "\t\tdata$getSerie(i+1)[1] - tail(data$getSerie(i), 1)\n",
-    "\t})\n",
-    "\tscal_product = weights * gaps\n",
-    "\tnorm_fact = sum( weights[!is.na(scal_product)] )\n",
-    "\tsum(scal_product, na.rm=TRUE) / norm_fact\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "hist(weights)"
+    "e_nn = computeError(data, p_nn, H)\n",
+    "e_nn2 = computeError(data, p_nn2, H)\n",
+    "e_az = computeError(data, p_az, H)\n",
+    "e_pz = computeError(data, p_pz, H)\n",
+    "options(repr.plot.width=9, repr.plot.height=7)\n",
+    "plotError(list(e_nn, e_pz, e_az, e_nn2), cols=c(1,2,colors()[258], 4))\n",
+    "\n",
+    "# Noir: Neighbors, bleu: Neighbors2, vert: moyenne, rouge: persistence\n",
+    "\n",
+    "i_np = which.min(e_nn$abs$indices)\n",
+    "i_p = which.max(e_nn$abs$indices)"
    ]
   },
   {
     "options(repr.plot.width=9, repr.plot.height=4)\n",
     "par(mfrow=c(1,2))\n",
     "\n",
-    "plotPredReal(data, p3, 3); title(paste(\"PredReal nn exo day\",3))\n",
-    "plotPredReal(data, p3, 5); title(paste(\"PredReal nn exo day\",5))\n",
+    "plotPredReal(data, p_nn, i_np); title(paste(\"PredReal nn day\",i_np))\n",
+    "plotPredReal(data, p_nn2, i_p); title(paste(\"PredReal nn day\",i_p))\n",
+    "\n",
+    "plotPredReal(data, p_nn2, i_np); title(paste(\"PredReal nn2 day\",i_np))\n",
+    "plotPredReal(data, p_nn2, i_p); title(paste(\"PredReal nn2 day\",i_p))\n",
     "\n",
-    "plotPredReal(data, p4, 3); title(paste(\"PredReal nn mix day\",3))\n",
-    "plotPredReal(data, p4, 5); title(paste(\"PredReal nn mix day\",5))\n",
+    "plotPredReal(data, p_az, i_np); title(paste(\"PredReal az day\",i_np))\n",
+    "plotPredReal(data, p_az, i_p); title(paste(\"PredReal az day\",i_p))\n",
     "\n",
     "# Bleu: prévue, noir: réalisée"
    ]
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "f_np_exo = computeFilaments(data, p_nn_exo, i_np, plot=TRUE); title(paste(\"Filaments nn exo day\",i_np))\n",
-    "f_p_exo = computeFilaments(data, p_nn_exo, i_p, plot=TRUE); title(paste(\"Filaments nn exo day\",i_p))\n",
+    "f_np = computeFilaments(data, p_nn, i_np, plot=TRUE); title(paste(\"Filaments nn day\",i_np))\n",
+    "f_p = computeFilaments(data, p_nn, i_p, plot=TRUE); title(paste(\"Filaments nn day\",i_p))\n",
     "\n",
-    "f_np_mix = computeFilaments(data, p_nn_mix, i_np, plot=TRUE); title(paste(\"Filaments nn mix day\",i_np))\n",
-    "f_p_mix = computeFilaments(data, p_nn_mix, i_p, plot=TRUE); title(paste(\"Filaments nn mix day\",i_p))"
+    "f_np2 = computeFilaments(data, p_nn2, i_np, plot=TRUE); title(paste(\"Filaments nn2 day\",i_np))\n",
+    "f_p2 = computeFilaments(data, p_nn2, i_p, plot=TRUE); title(paste(\"Filaments nn2 day\",i_p))"
    ]
   },
   {
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotFilamentsBox(data, f_np_exo); title(paste(\"FilBox nn exo day\",i_np))\n",
-    "plotFilamentsBox(data, f_p_exo); title(paste(\"FilBox nn exo day\",i_p))\n",
+    "plotFilamentsBox(data, f_np); title(paste(\"FilBox nn day\",i_np))\n",
+    "plotFilamentsBox(data, f_p); title(paste(\"FilBox nn day\",i_p))\n",
     "\n",
-    "plotFilamentsBox(data, f_np_mix); title(paste(\"FilBox nn mix day\",i_np))\n",
-    "plotFilamentsBox(data, f_p_mix); title(paste(\"FilBox nn mix day\",i_p))"
+    "plotFilamentsBox(data, f_np2); title(paste(\"FilBox nn2 day\",i_np))\n",
+    "plotFilamentsBox(data, f_p2); title(paste(\"FilBox nn2 day\",i_p))"
    ]
   },
   {
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotRelVar(data, f_np_exo); title(paste(\"StdDev nn exo day\",i_np))\n",
-    "plotRelVar(data, f_p_exo); title(paste(\"StdDev nn exo day\",i_p))\n",
+    "plotRelVar(data, f_np); title(paste(\"StdDev nn day\",i_np))\n",
+    "plotRelVar(data, f_p); title(paste(\"StdDev nn day\",i_p))\n",
     "\n",
-    "plotRelVar(data, f_np_mix); title(paste(\"StdDev nn mix day\",i_np))\n",
-    "plotRelVar(data, f_p_mix); title(paste(\"StdDev nn mix day\",i_p))\n",
+    "plotRelVar(data, f_np2); title(paste(\"StdDev nn2 day\",i_np))\n",
+    "plotRelVar(data, f_p2); title(paste(\"StdDev nn2 day\",i_p))\n",
     "\n",
     "# Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir"
    ]
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotSimils(p_nn_exo, i_np); title(paste(\"Weights nn exo day\",i_np))\n",
-    "plotSimils(p_nn_exo, i_p); title(paste(\"Weights nn exo day\",i_p))\n",
+    "plotSimils(p_nn, i_np); title(paste(\"Weights nn day\",i_np))\n",
+    "plotSimils(p_nn, i_p); title(paste(\"Weights nn day\",i_p))\n",
     "\n",
-    "plotSimils(p_nn_mix, i_np); title(paste(\"Weights nn mix day\",i_np))\n",
-    "plotSimils(p_nn_mix, i_p); title(paste(\"Weights nn mix day\",i_p))\n",
+    "plotSimils(p_nn2, i_np); title(paste(\"Weights nn2 day\",i_np))\n",
+    "plotSimils(p_nn2, i_p); title(paste(\"Weights nn2 day\",i_p))\n",
     "\n",
     "# - pollué à gauche, + pollué à droite"
    ]
    },
    "outputs": [],
    "source": [
-    "# Fenêtres sélectionnées dans ]0,10] / endo à gauche, exo à droite\n",
-    "p_nn_exo$getParams(i_np)$window\n",
-    "p_nn_exo$getParams(i_p)$window\n",
+    "# Fenêtres sélectionnées dans ]0,7] / nn à gauche, nn2 à droite\n",
+    "p_nn$getParams(i_np)$window\n",
+    "p_nn$getParams(i_p)$window\n",
     "\n",
-    "p_nn_mix$getParams(i_np)$window\n",
-    "p_nn_mix$getParams(i_p)$window"
+    "p_nn2$getParams(i_np)$window\n",
+    "p_nn2$getParams(i_p)$window"
    ]
   },
   {
    },
    "outputs": [],
    "source": [
-    "p_nn_exo = computeForecast(data, indices_ep, \"Neighbors\", \"Neighbors\",\n",
-    "\thorizon=3, simtype=\"exo\")\n",
-    "p_nn_mix = computeForecast(data, indices_ep, \"Neighbors\", \"Neighbors\",\n",
-    "\thorizon=3, simtype=\"mix\")\n",
-    "p_az = computeForecast(data, indices_ep, \"Average\", \"Zero\",\n",
-    "\thorizon=3)\n",
-    "p_pz = computeForecast(data, indices_ep, \"Persistence\", \"Zero\",\n",
-    "\thorizon=3, same_day=TRUE)"
+    "p_nn = computeForecast(data, indices_ep, \"Neighbors\", \"Neighbors\", horizon=H)\n",
+    "p_nn2 = computeForecast(data, indices_ep, \"Neighbors2\", \"Zero\", horizon=H)\n",
+    "p_az = computeForecast(data, indices_ep, \"Average\", \"Zero\", horizon=H)\n",
+    "p_pz = computeForecast(data, indices_ep, \"Persistence\", \"Zero\", horizon=H, same_day=TRUE)"
    ]
   },
   {
    },
    "outputs": [],
    "source": [
-    "e_nn_exo = computeError(data, p_nn_exo, 3)\n",
-    "e_nn_mix = computeError(data, p_nn_mix, 3)\n",
-    "e_az = computeError(data, p_az, 3)\n",
-    "e_pz = computeError(data, p_pz, 3)\n",
+    "e_nn = computeError(data, p_nn, H)\n",
+    "e_nn2 = computeError(data, p_nn2, H)\n",
+    "e_az = computeError(data, p_az, H)\n",
+    "e_pz = computeError(data, p_pz, H)\n",
     "options(repr.plot.width=9, repr.plot.height=7)\n",
-    "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n",
+    "plotError(list(e_nn, e_pz, e_az, e_nn2), cols=c(1,2,colors()[258], 4))\n",
     "\n",
-    "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n",
+    "# Noir: Neighbors, bleu: Neighbors2, vert: moyenne, rouge: persistence\n",
     "\n",
-    "i_np = which.min(e_nn_exo$abs$indices)\n",
-    "i_p = which.max(e_nn_exo$abs$indices)"
+    "i_np = which.min(e_nn$abs$indices)\n",
+    "i_p = which.max(e_nn$abs$indices)"
    ]
   },
   {
     "options(repr.plot.width=9, repr.plot.height=4)\n",
     "par(mfrow=c(1,2))\n",
     "\n",
-    "plotPredReal(data, p_nn_exo, i_np); title(paste(\"PredReal nn exo day\",i_np))\n",
-    "plotPredReal(data, p_nn_exo, i_p); title(paste(\"PredReal nn exo day\",i_p))\n",
+    "plotPredReal(data, p_nn, i_np); title(paste(\"PredReal nn day\",i_np))\n",
+    "plotPredReal(data, p_nn2, i_p); title(paste(\"PredReal nn day\",i_p))\n",
     "\n",
-    "plotPredReal(data, p_nn_mix, i_np); title(paste(\"PredReal nn mix day\",i_np))\n",
-    "plotPredReal(data, p_nn_mix, i_p); title(paste(\"PredReal nn mix day\",i_p))\n",
+    "plotPredReal(data, p_nn2, i_np); title(paste(\"PredReal nn2 day\",i_np))\n",
+    "plotPredReal(data, p_nn2, i_p); title(paste(\"PredReal nn2 day\",i_p))\n",
     "\n",
     "plotPredReal(data, p_az, i_np); title(paste(\"PredReal az day\",i_np))\n",
     "plotPredReal(data, p_az, i_p); title(paste(\"PredReal az day\",i_p))\n",
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "f_np_exo = computeFilaments(data, p_nn_exo, i_np, plot=TRUE); title(paste(\"Filaments nn exo day\",i_np))\n",
-    "f_p_exo = computeFilaments(data, p_nn_exo, i_p, plot=TRUE); title(paste(\"Filaments nn exo day\",i_p))\n",
+    "f_np = computeFilaments(data, p_nn, i_np, plot=TRUE); title(paste(\"Filaments nn day\",i_np))\n",
+    "f_p = computeFilaments(data, p_nn, i_p, plot=TRUE); title(paste(\"Filaments nn day\",i_p))\n",
     "\n",
-    "f_np_mix = computeFilaments(data, p_nn_mix, i_np, plot=TRUE); title(paste(\"Filaments nn mix day\",i_np))\n",
-    "f_p_mix = computeFilaments(data, p_nn_mix, i_p, plot=TRUE); title(paste(\"Filaments nn mix day\",i_p))"
+    "f_np2 = computeFilaments(data, p_nn2, i_np, plot=TRUE); title(paste(\"Filaments nn2 day\",i_np))\n",
+    "f_p2 = computeFilaments(data, p_nn2, i_p, plot=TRUE); title(paste(\"Filaments nn2 day\",i_p))"
    ]
   },
   {
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotFilamentsBox(data, f_np_exo); title(paste(\"FilBox nn exo day\",i_np))\n",
-    "plotFilamentsBox(data, f_p_exo); title(paste(\"FilBox nn exo day\",i_p))\n",
+    "plotFilamentsBox(data, f_np); title(paste(\"FilBox nn day\",i_np))\n",
+    "plotFilamentsBox(data, f_p); title(paste(\"FilBox nn day\",i_p))\n",
     "\n",
-    "plotFilamentsBox(data, f_np_mix); title(paste(\"FilBox nn mix day\",i_np))\n",
-    "plotFilamentsBox(data, f_p_mix); title(paste(\"FilBox nn mix day\",i_p))"
+    "plotFilamentsBox(data, f_np2); title(paste(\"FilBox nn2 day\",i_np))\n",
+    "plotFilamentsBox(data, f_p2); title(paste(\"FilBox nn2 day\",i_p))"
    ]
   },
   {
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotRelVar(data, f_np_exo); title(paste(\"StdDev nn exo day\",i_np))\n",
-    "plotRelVar(data, f_p_exo); title(paste(\"StdDev nn exo day\",i_p))\n",
+    "plotRelVar(data, f_np); title(paste(\"StdDev nn day\",i_np))\n",
+    "plotRelVar(data, f_p); title(paste(\"StdDev nn day\",i_p))\n",
     "\n",
-    "plotRelVar(data, f_np_mix); title(paste(\"StdDev nn mix day\",i_np))\n",
-    "plotRelVar(data, f_p_mix); title(paste(\"StdDev nn mix day\",i_p))\n",
+    "plotRelVar(data, f_np2); title(paste(\"StdDev nn2 day\",i_np))\n",
+    "plotRelVar(data, f_p2); title(paste(\"StdDev nn2 day\",i_p))\n",
     "\n",
     "# Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir"
    ]
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotSimils(p_nn_exo, i_np); title(paste(\"Weights nn exo day\",i_np))\n",
-    "plotSimils(p_nn_exo, i_p); title(paste(\"Weights nn exo day\",i_p))\n",
+    "plotSimils(p_nn, i_np); title(paste(\"Weights nn day\",i_np))\n",
+    "plotSimils(p_nn, i_p); title(paste(\"Weights nn day\",i_p))\n",
     "\n",
-    "plotSimils(p_nn_mix, i_np); title(paste(\"Weights nn mix day\",i_np))\n",
-    "plotSimils(p_nn_mix, i_p); title(paste(\"Weights nn mix day\",i_p))\n",
+    "plotSimils(p_nn2, i_np); title(paste(\"Weights nn2 day\",i_np))\n",
+    "plotSimils(p_nn2, i_p); title(paste(\"Weights nn2 day\",i_p))\n",
     "\n",
     "# - pollué à gauche, + pollué à droite"
    ]
    },
    "outputs": [],
    "source": [
-    "# Fenêtres sélectionnées dans ]0,10] / endo à gauche, exo à droite\n",
-    "p_nn_exo$getParams(i_np)$window\n",
-    "p_nn_exo$getParams(i_p)$window\n",
+    "# Fenêtres sélectionnées dans ]0,7] / nn à gauche, nn2 à droite\n",
+    "p_nn$getParams(i_np)$window\n",
+    "p_nn$getParams(i_p)$window\n",
     "\n",
-    "p_nn_mix$getParams(i_np)$window\n",
-    "p_nn_mix$getParams(i_p)$window"
+    "p_nn2$getParams(i_np)$window\n",
+    "p_nn2$getParams(i_p)$window"
    ]
   },
   {
    },
    "outputs": [],
    "source": [
-    "p_nn_exo = computeForecast(data, indices_np, \"Neighbors\", \"Neighbors\",\n",
-    "\thorizon=3, simtype=\"exo\")\n",
-    "p_nn_mix = computeForecast(data, indices_np, \"Neighbors\", \"Neighbors\",\n",
-    "\thorizon=3, simtype=\"mix\")\n",
-    "p_az = computeForecast(data, indices_np, \"Average\", \"Zero\",\n",
-    "\thorizon=3)\n",
-    "p_pz = computeForecast(data, indices_np, \"Persistence\", \"Zero\",\n",
-    "\thorizon=3, same_day=FALSE)"
+    "p_nn = computeForecast(data, indices_np, \"Neighbors\", \"Neighbors\", horizon=H)\n",
+    "p_nn2 = computeForecast(data, indices_np, \"Neighbors2\", \"Zero\", horizon=H)\n",
+    "p_az = computeForecast(data, indices_np, \"Average\", \"Zero\", horizon=H)\n",
+    "p_pz = computeForecast(data, indices_np, \"Persistence\", \"Zero\", horizon=H, same_day=FALSE)"
    ]
   },
   {
    },
    "outputs": [],
    "source": [
-    "e_nn_exo = computeError(data, p_nn_exo, 3)\n",
-    "e_nn_mix = computeError(data, p_nn_mix, 3)\n",
-    "e_az = computeError(data, p_az, 3)\n",
-    "e_pz = computeError(data, p_pz, 3)\n",
+    "e_nn = computeError(data, p_nn, H)\n",
+    "e_nn2 = computeError(data, p_nn2, H)\n",
+    "e_az = computeError(data, p_az, H)\n",
+    "e_pz = computeError(data, p_pz, H)\n",
     "options(repr.plot.width=9, repr.plot.height=7)\n",
-    "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n",
+    "plotError(list(e_nn, e_pz, e_az, e_nn2), cols=c(1,2,colors()[258], 4))\n",
     "\n",
-    "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n",
+    "# Noir: Neighbors, bleu: Neighbors2, vert: moyenne, rouge: persistence\n",
     "\n",
-    "i_np = which.min(e_nn_exo$abs$indices)\n",
-    "i_p = which.max(e_nn_exo$abs$indices)"
+    "i_np = which.min(e_nn$abs$indices)\n",
+    "i_p = which.max(e_nn$abs$indices)"
    ]
   },
   {
     "options(repr.plot.width=9, repr.plot.height=4)\n",
     "par(mfrow=c(1,2))\n",
     "\n",
-    "plotPredReal(data, p_nn_exo, i_np); title(paste(\"PredReal nn exo day\",i_np))\n",
-    "plotPredReal(data, p_nn_exo, i_p); title(paste(\"PredReal nn exo day\",i_p))\n",
+    "plotPredReal(data, p_nn, i_np); title(paste(\"PredReal nn day\",i_np))\n",
+    "plotPredReal(data, p_nn2, i_p); title(paste(\"PredReal nn day\",i_p))\n",
     "\n",
-    "plotPredReal(data, p_nn_mix, i_np); title(paste(\"PredReal nn mix day\",i_np))\n",
-    "plotPredReal(data, p_nn_mix, i_p); title(paste(\"PredReal nn mix day\",i_p))\n",
+    "plotPredReal(data, p_nn2, i_np); title(paste(\"PredReal nn2 day\",i_np))\n",
+    "plotPredReal(data, p_nn2, i_p); title(paste(\"PredReal nn2 day\",i_p))\n",
     "\n",
     "plotPredReal(data, p_az, i_np); title(paste(\"PredReal az day\",i_np))\n",
     "plotPredReal(data, p_az, i_p); title(paste(\"PredReal az day\",i_p))\n",
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "f_np_exo = computeFilaments(data, p_nn_exo, i_np, plot=TRUE); title(paste(\"Filaments nn exo day\",i_np))\n",
-    "f_p_exo = computeFilaments(data, p_nn_exo, i_p, plot=TRUE); title(paste(\"Filaments nn exo day\",i_p))\n",
+    "f_np = computeFilaments(data, p_nn, i_np, plot=TRUE); title(paste(\"Filaments nn day\",i_np))\n",
+    "f_p = computeFilaments(data, p_nn, i_p, plot=TRUE); title(paste(\"Filaments nn day\",i_p))\n",
     "\n",
-    "f_np_mix = computeFilaments(data, p_nn_mix, i_np, plot=TRUE); title(paste(\"Filaments nn mix day\",i_np))\n",
-    "f_p_mix = computeFilaments(data, p_nn_mix, i_p, plot=TRUE); title(paste(\"Filaments nn mix day\",i_p))"
+    "f_np2 = computeFilaments(data, p_nn2, i_np, plot=TRUE); title(paste(\"Filaments nn2 day\",i_np))\n",
+    "f_p2 = computeFilaments(data, p_nn2, i_p, plot=TRUE); title(paste(\"Filaments nn2 day\",i_p))"
    ]
   },
   {
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotFilamentsBox(data, f_np_exo); title(paste(\"FilBox nn exo day\",i_np))\n",
-    "plotFilamentsBox(data, f_p_exo); title(paste(\"FilBox nn exo day\",i_p))\n",
+    "plotFilamentsBox(data, f_np); title(paste(\"FilBox nn day\",i_np))\n",
+    "plotFilamentsBox(data, f_p); title(paste(\"FilBox nn day\",i_p))\n",
     "\n",
-    "plotFilamentsBox(data, f_np_mix); title(paste(\"FilBox nn mix day\",i_np))\n",
-    "plotFilamentsBox(data, f_p_mix); title(paste(\"FilBox nn mix day\",i_p))"
+    "plotFilamentsBox(data, f_np2); title(paste(\"FilBox nn2 day\",i_np))\n",
+    "plotFilamentsBox(data, f_p2); title(paste(\"FilBox nn2 day\",i_p))"
    ]
   },
   {
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotRelVar(data, f_np_exo); title(paste(\"StdDev nn exo day\",i_np))\n",
-    "plotRelVar(data, f_p_exo); title(paste(\"StdDev nn exo day\",i_p))\n",
+    "plotRelVar(data, f_np); title(paste(\"StdDev nn day\",i_np))\n",
+    "plotRelVar(data, f_p); title(paste(\"StdDev nn day\",i_p))\n",
     "\n",
-    "plotRelVar(data, f_np_mix); title(paste(\"StdDev nn mix day\",i_np))\n",
-    "plotRelVar(data, f_p_mix); title(paste(\"StdDev nn mix day\",i_p))\n",
+    "plotRelVar(data, f_np2); title(paste(\"StdDev nn2 day\",i_np))\n",
+    "plotRelVar(data, f_p2); title(paste(\"StdDev nn2 day\",i_p))\n",
     "\n",
     "# Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir"
    ]
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotSimils(p_nn_exo, i_np); title(paste(\"Weights nn exo day\",i_np))\n",
-    "plotSimils(p_nn_exo, i_p); title(paste(\"Weights nn exo day\",i_p))\n",
+    "plotSimils(p_nn, i_np); title(paste(\"Weights nn day\",i_np))\n",
+    "plotSimils(p_nn, i_p); title(paste(\"Weights nn day\",i_p))\n",
     "\n",
-    "plotSimils(p_nn_mix, i_np); title(paste(\"Weights nn mix day\",i_np))\n",
-    "plotSimils(p_nn_mix, i_p); title(paste(\"Weights nn mix day\",i_p))\n",
+    "plotSimils(p_nn2, i_np); title(paste(\"Weights nn2 day\",i_np))\n",
+    "plotSimils(p_nn2, i_p); title(paste(\"Weights nn2 day\",i_p))\n",
     "\n",
     "# - pollué à gauche, + pollué à droite"
    ]
    },
    "outputs": [],
    "source": [
-    "# Fenêtres sélectionnées dans ]0,10] / endo à gauche, exo à droite\n",
-    "p_nn_exo$getParams(i_np)$window\n",
-    "p_nn_exo$getParams(i_p)$window\n",
-    "\n",
-    "p_nn_mix$getParams(i_np)$window\n",
-    "p_nn_mix$getParams(i_p)$window"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "deletable": true,
-    "editable": true
-   },
-   "source": [
-    "\n",
-    "\n",
-    "<h2>Bilan</h2>\n",
-    "\n",
-    "Problème difficile : on ne fait guère mieux qu'une naïve moyenne des lendemains des jours\n",
-    "similaires dans le passé, ce qui n'est pas loin de prédire une série constante égale à la\n",
-    "dernière valeur observée (méthode \"zéro\"). La persistence donne parfois de bons résultats\n",
-    "mais est trop instable (sensibilité à l'argument <code>same_day</code>).\n",
+    "# Fenêtres sélectionnées dans ]0,7] / nn à gauche, nn2 à droite\n",
+    "p_nn$getParams(i_np)$window\n",
+    "p_nn$getParams(i_p)$window\n",
     "\n",
-    "Comment améliorer la méthode ?"
+    "p_nn2$getParams(i_np)$window\n",
+    "p_nn2$getParams(i_p)$window"
    ]
   }
  ],