From ee8b1b4e3c13f8dcf13a2c8da6a3bef1520c8252 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Wed, 29 Mar 2017 01:52:01 +0200
Subject: [PATCH] fix methods, update report generation

---
 pkg/DESCRIPTION            |   3 +-
 pkg/R/F_Neighbors.R        |  11 ++--
 pkg/R/F_Neighbors2.R       |  37 ++++++-------
 pkg/R/J_Neighbors.R        |   2 +-
 pkg/R/computeForecast.R    |  35 ++++++-------
 pkg/R/utils.R              |  69 +++++++++++++++----------
 reports/ipynb_generator.py |   6 +--
 reports/report.gj          | 103 ++++++++++++++++---------------------
 reports/report.ipynb       |  67 +++++++++++++++++-------
 9 files changed, 181 insertions(+), 152 deletions(-)

diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION
index 8d3c4c3..c90ce4f 100644
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -15,6 +15,7 @@ Imports:
     R6,
     methods
 Suggests:
+    parallel,
     devtools,
     roxygen2,
     testthat,
@@ -22,7 +23,7 @@ Suggests:
 LazyData: yes
 URL: http://git.auder.net/?p=talweg.git
 License: MIT + file LICENSE
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
 Collate:
     'A_NAMESPACE.R'
     'Data.R'
diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R
index 5b2c899..d889a34 100644
--- a/pkg/R/F_Neighbors.R
+++ b/pkg/R/F_Neighbors.R
@@ -31,7 +31,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 			}
 
 			# Indices of similar days for cross-validation; TODO: 45 = magic number
-			sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
+			sdays = getSimilarDaysIndices(today, data, limit=45, same_season=FALSE)
 
 			cv_days = intersect(fdays,sdays)
 			# Limit to 20 most recent matching days (TODO: 20 == magic number)
@@ -134,7 +134,12 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 					M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
 
 				sigma = cov(M) #NOTE: robust covariance is way too slow
-				sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
+				# TODO: 10 == magic number; more robust way == det, or always ginv()
+				sigma_inv =
+					if (length(fdays) > 10)
+						solve(sigma)
+					else
+						MASS::ginv(sigma)
 
 				# Distances from last observed day to days in the past
 				distances2 = sapply(seq_along(fdays), function(i) {
@@ -143,7 +148,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 				})
 
 				sd_dist = sd(distances2)
-				if (sd_dist < .Machine$double.eps)
+				if (sd_dist < .25 * sqrt(.Machine$double.eps))
 				{
 #					warning("All computed distances are very close: stdev too small")
 					sd_dist = 1 #mostly for tests... FIXME:
diff --git a/pkg/R/F_Neighbors2.R b/pkg/R/F_Neighbors2.R
index 83ef453..787dd2b 100644
--- a/pkg/R/F_Neighbors2.R
+++ b/pkg/R/F_Neighbors2.R
@@ -9,15 +9,11 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 	inherit = Forecaster,
 
 	public = list(
-#		predictSerie = function(data, today, memory, horizon, ...)
-#		{
-#			# Parameters (potentially) computed during shape prediction stage
-#			predicted_shape = self$predictShape(data, today, memory, horizon, ...)
-##			predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...)
-#			# Predicted shape is aligned it on the end of current day + jump
-##			predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta
-#			predicted_shape
-#		},
+		predictSerie = function(data, today, memory, horizon, ...)
+		{
+			# This method predict shape + level at the same time, all in next call
+			self$predictShape(data, today, memory, horizon, ...)
+		},
 		predictShape = function(data, today, memory, horizon, ...)
 		{
 			# (re)initialize computed parameters
@@ -40,7 +36,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 			}
 
 			# Indices of similar days for cross-validation; TODO: 45 = magic number
-			sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
+			sdays = getSimilarDaysIndices(today, data, limit=45, same_season=FALSE)
 
 			cv_days = intersect(fdays,sdays)
 			# Limit to 20 most recent matching days (TODO: 20 == magic number)
@@ -101,24 +97,25 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 		{
 			fdays = fdays[ fdays < today ]
 			# TODO: 3 = magic number
-			if (length(fdays) < 1)
+			if (length(fdays) < 3)
 				return (NA)
 
 			# Neighbors: days in "same season"
-			sdays = getSimilarDaysIndices(today, limit=45, same_season=TRUE, data)
+			sdays = getSimilarDaysIndices(today, data, limit=45, same_season=TRUE)
 			indices = intersect(fdays,sdays)
+			if (length(indices) <= 1)
+				return (NA)
 			levelToday = data$getLevel(today)
 			distances = sapply(indices, function(i) abs(data$getLevel(i)-levelToday))
+			# 2 and 5 below == magic numbers (determined by Bruno & Michel)
 			same_pollution = (distances <= 2)
-			if (sum(same_pollution) < 1) #TODO: 3 == magic number
+			if (sum(same_pollution) == 0)
 			{
 				same_pollution = (distances <= 5)
-				if (sum(same_pollution) < 1)
+				if (sum(same_pollution) == 0)
 					return (NA)
 			}
 			indices = indices[same_pollution]
-
-			#TODO: we shouldn't need that block
 			if (length(indices) == 1)
 			{
 				if (final_call)
@@ -169,8 +166,12 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 					M[i+1,] = c( data$getLevel(indices[i]), as.double(data$getExo(indices[i])) )
 
 				sigma = cov(M) #NOTE: robust covariance is way too slow
-#				sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
-				sigma_inv = MASS::ginv(sigma)
+				# TODO: 10 == magic number; more robust way == det, or always ginv()
+				sigma_inv =
+					if (length(indices) > 10)
+						solve(sigma)
+					else
+						MASS::ginv(sigma)
 
 				# Distances from last observed day to days in the past
 				distances2 = sapply(seq_along(indices), function(i) {
diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R
index 3c9bc30..7ca0003 100644
--- a/pkg/R/J_Neighbors.R
+++ b/pkg/R/J_Neighbors.R
@@ -6,7 +6,7 @@
 getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...)
 {
 	first_day = max(1, today-memory)
-	filter = params$indices >= first_day
+	filter = (params$indices >= first_day)
 	indices = params$indices[filter]
 	weights = params$weights[filter]
 
diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R
index bd19b8d..d635560 100644
--- a/pkg/R/computeForecast.R
+++ b/pkg/R/computeForecast.R
@@ -19,6 +19,7 @@
 #' }
 #' @param memory Data depth (in days) to be used for prediction
 #' @param horizon Number of time steps to predict
+#' @param ncores Number of cores for parallel execution (1 to disable)
 #' @param ... Additional parameters for the forecasting models
 #'
 #' @return An object of class Forecast
@@ -39,7 +40,7 @@
 #' }}
 #' @export
 computeForecast = function(data, indices, forecaster, pjump,
-	memory=Inf, horizon=data$getStdHorizon(), ...)
+	memory=Inf, horizon=data$getStdHorizon(), ncores=3, ...)
 {
 	# (basic) Arguments sanity checks
 	horizon = as.integer(horizon)[1]
@@ -56,39 +57,33 @@ computeForecast = function(data, indices, forecaster, pjump,
 	forecaster = forecaster_class_name$new( #.pjump =
 		getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg"))
 
-#oo = forecaster$predictSerie(data, integer_indices[1], memory, horizon, ...)
-#browser()
-
-	parll=TRUE #FALSE
-	if (parll)
+	if (ncores > 1 && requireNamespace("parallel",quietly=TRUE))
 	{
-		library(parallel)
-		ppp <- parallel::mclapply(seq_along(integer_indices), function(i) {
+		p <- parallel::mclapply(seq_along(integer_indices), function(i) {
 			list(
 				"forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
 				"params"= forecaster$getParameters(),
 				"index" = integer_indices[i] )
-			}, mc.cores=3)
+			}, mc.cores=ncores)
 	}
 	else
 	{
-		ppp <- lapply(seq_along(integer_indices), function(i) {
+		p <- lapply(seq_along(integer_indices), function(i) {
 			list(
 				"forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
 				"params"= forecaster$getParameters(),
 				"index" = integer_indices[i] )
 			})
 	}
-#browser()
-
-for (i in seq_along(integer_indices))
-{
-	pred$append(
-			new_serie = ppp[[i]]$forecast,
-			new_params = ppp[[i]]$params,
-			new_index_in_data = ppp[[i]]$index
-	)
-}
 
+	# TODO: find a way to fill pred in //...
+	for (i in seq_along(integer_indices))
+	{
+		pred$append(
+			new_serie = p[[i]]$forecast,
+			new_params = p[[i]]$params,
+			new_index_in_data = p[[i]]$index
+		)
+	}
 	pred
 }
diff --git a/pkg/R/utils.R b/pkg/R/utils.R
index 20f396b..b3e66e1 100644
--- a/pkg/R/utils.R
+++ b/pkg/R/utils.R
@@ -56,52 +56,65 @@ integerIndexToDate = function(index, data)
 #' Find similar days indices in the past.
 #'
 #' @param index Day index (numeric or date)
+#' @param data Reference dataset, object output of \code{getData}
 #' @param limit Maximum number of indices to return
 #' @param same_season Should the indices correspond to day in same season?
-#' @param data Dataset is required for a search in same season
 #'
 #' @export
-getSimilarDaysIndices = function(index, limit, same_season, data=NULL)
+getSimilarDaysIndices = function(index, data, limit, same_season)
 {
-	index = dateIndexToInteger(index)
+	index = dateIndexToInteger(index, data)
 
-	#TODO: mardi similaire à lundi mercredi jeudi aussi ...etc ==> "isSimilarDay()..."
-	if (!same_season)
-	{
-		#take all similar days in recent past
-		nb_days = min( (index-1) %/% 7, limit)
-		return ( rep(index,nb_days) - 7*seq_len(nb_days) )
-	}
-
-	#Look for similar days in similar season
-	nb_days = min( (index-1) %/% 7, limit)
-	i = index - 7
+	# Look for similar days (optionally in same season)
+	i = index - 1
 	days = c()
-	month_ref = as.POSIXlt(data$getTime(index)[1])$mon + 1
+	dt_ref = as.POSIXlt(data$getTime(index)[1]) #first date-time of current day
+	day_ref = dt_ref$wday #1=monday, ..., 6=saturday, 0=sunday
+	month_ref = as.POSIXlt(data$getTime(index)[1])$mon+1 #month in 1...12
 	while (i >= 1 && length(days) < limit)
 	{
-		if (isSameSeason(as.POSIXlt(data$getTime(i)[1])$mon + 1, month_ref))
-			days = c(days, i)
-		i = i-7
+		dt = as.POSIXlt(data$getTime(i)[1])
+		if (.isSameDay(dt$wday, day_ref))
+		{
+			if (!same_season || .isSameSeason(dt$mon+1, month_ref))
+				days = c(days, i)
+		}
+		i = i - 1
 	}
 	return ( days )
 }
 
-#TODO: use data... 12-12-1-2 CH, 3-4-9-10 EP et le reste NP
-isSameSeason = function(month, month_ref)
+# isSameSeason
+#
+# Check if two months fall in the same "season" (defined by estimated pollution rate)
+#
+# @param month month index to test
+# @param month_ref month to compare to
+#
+.isSameSeason = function(month, month_ref)
 {
-	if (month_ref %in% c(11,12,1,2))
+	if (month_ref %in% c(11,12,1,2)) #~= mid-polluted
 		return (month %in% c(11,12,1,2))
-	if (month_ref %in% c(3,4,9,10))
+	if (month_ref %in% c(3,4,9,10)) #~= high-polluted
 		return (month %in% c(3,4,9,10))
-	return (month %in% c(5,6,7,8))
+	return (month %in% c(5,6,7,8)) #~= non polluted
 }
 
-#TODO:
-#distinction lun-jeudi, puis ven, sam, dim
-#isSameDay = function(day, day_ref)
-#{
-#	if (day_ref == 
+# isSameDay
+#
+# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials
+#
+# @param day day index to test
+# @param day_ref day index to compare to
+#
+.isSameDay = function(day, day_ref)
+{
+	if (day_ref == 0)
+		return (day==0)
+	if (day_ref <= 4)
+		return (day <= 4)
+	return (day == day_ref)
+}
 
 #' getNoNA2
 #'
diff --git a/reports/ipynb_generator.py b/reports/ipynb_generator.py
index ce546ad..456fc22 100755
--- a/reports/ipynb_generator.py
+++ b/reports/ipynb_generator.py
@@ -123,15 +123,15 @@ def driver():
         inputfile = sys.argv[1]
         with open(inputfile, 'r') as f:
             text = f.read()
-        outputfile = '-' if len(sys.argv) <= 2 else sys.argv[2]
+        # Assuming file extension .gj (generate Jupyter); TODO: less strict
+        outputfile = inputfile[:-3]+'.ipynb' if (len(sys.argv)<=2 or sys.argv[2]=='-') \
+            else sys.argv[2]
     except (IndexError, IOError) as e:
         print('Usage: %s inputfile [outputfile|- [Mako args]]' % (sys.argv[0]))
         print(e)
         sys.exit(1)
     cells = read(text, argv=sys.argv[3:])
     filestr = write(cells)
-    # Assuming file extension .gj (generate Jupyter); TODO: less strict
-    outputfile = inputfile[:-3]+'.ipynb' if outputfile == '-' else outputfile
     with open(outputfile, 'w') as f:
         f.write(filestr)
 
diff --git a/reports/report.gj b/reports/report.gj
index aee6ad4..657b1d7 100644
--- a/reports/report.gj
+++ b/reports/report.gj
@@ -2,15 +2,14 @@
 <h2>Introduction</h2>
 
 J'ai fait quelques essais dans différentes configurations pour la méthode "Neighbors"
-(la seule dont on a parlé) et sa variante récente appelée pour l'instant "Neighbors2".<br>
+(la seule dont on a parlé) et sa variante récente appelée pour l'instant "Neighbors2",
+avec simtype="mix" : deux types de similarités prises en compte, puis multiplication des poids.
+Pour Neighbors on prédit le saut (par la moyenne pondérée des sauts passés), et pour Neighbors2
+on n'effectue aucun raccordement (prévision directe).
 
- * simtype="exo", "endo" ou "mix" : type de similarités (fenêtre optimisée par VC)
- * same_season=FALSE : les indices pour la validation croisée ne tiennent pas compte des saisons
- * mix_strategy="mult" : on multiplie les poids (au lieu d'en éteindre)
-
-J'ai systématiquement comparé à une approche naïve : la moyennes des lendemains des jours
-"similaires" dans tout le passé ; à chaque fois sans prédiction du saut (sauf pour Neighbors :
-prédiction basée sur les poids calculés).
+J'ai systématiquement comparé à une approche naïve : la moyenne des lendemains des jours
+"similaires" dans tout le passé, ainsi qu'à la persistence -- reproduisant le jour courant ou
+allant chercher le futur similaire une semaine avant.
 
 Ensuite j'affiche les erreurs, quelques courbes prévues/mesurées, quelques filaments puis les
 histogrammes de quelques poids. Concernant les graphes de filaments, la moitié gauche du graphe
@@ -29,8 +28,9 @@ H = ${H} #horizon (en heures)
 
 ts_data = read.csv(system.file("extdata","pm10_mesures_H_loc_report.csv",package="talweg"))
 exo_data = read.csv(system.file("extdata","meteo_extra_noNAs.csv",package="talweg"))
-data = getData(ts_data, exo_data, input_tz = "Europe/Paris", working_tz="Europe/Paris",
-	predict_at=P) #predict from P+1 to P+H included
+# NOTE: 'GMT' because DST gaps are filled and multiple values merged in above dataset.
+# Prediction from P+1 to P+H included.
+data = getData(ts_data, exo_data, input_tz = "GMT", working_tz="GMT", predict_at=P)
 
 indices_ch = seq(as.Date("2015-01-18"),as.Date("2015-01-24"),"days")
 indices_ep = seq(as.Date("2015-03-15"),as.Date("2015-03-21"),"days")
@@ -40,35 +40,31 @@ indices_np = seq(as.Date("2015-04-26"),as.Date("2015-05-02"),"days")
 -----
 <h2 style="color:blue;font-size:2em">${list_titles[i]}</h2>
 -----r
-p_nn_exo = computeForecast(data, ${list_indices[i]}, "Neighbors", "Neighbors",
-	horizon=H, simtype="exo")
-p_nn_mix = computeForecast(data, ${list_indices[i]}, "Neighbors", "Neighbors",
-	horizon=H, simtype="mix")
-p_az = computeForecast(data, ${list_indices[i]}, "Average", "Zero",
-	horizon=H)
-p_pz = computeForecast(data, ${list_indices[i]}, "Persistence", "Zero",
-	horizon=${H}, same_day=${'TRUE' if loop.index < 2 else 'FALSE'})
+p_nn = computeForecast(data, ${list_indices[i]}, "Neighbors", "Neighbors", horizon=H)
+p_nn2 = computeForecast(data, ${list_indices[i]}, "Neighbors2", "Zero", horizon=H)
+p_az = computeForecast(data, ${list_indices[i]}, "Average", "Zero", horizon=H)
+p_pz = computeForecast(data, ${list_indices[i]}, "Persistence", "Zero", horizon=H, same_day=${'TRUE' if loop.index < 2 else 'FALSE'})
 -----r
-e_nn_exo = computeError(data, p_nn_exo, H)
-e_nn_mix = computeError(data, p_nn_mix, H)
+e_nn = computeError(data, p_nn, H)
+e_nn2 = computeError(data, p_nn2, H)
 e_az = computeError(data, p_az, H)
 e_pz = computeError(data, p_pz, H)
 options(repr.plot.width=9, repr.plot.height=7)
-plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))
+plotError(list(e_nn, e_pz, e_az, e_nn2), cols=c(1,2,colors()[258], 4))
 
-# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence
+# Noir: Neighbors, bleu: Neighbors2, vert: moyenne, rouge: persistence
 
-i_np = which.min(e_nn_exo$abs$indices)
-i_p = which.max(e_nn_exo$abs$indices)
+i_np = which.min(e_nn$abs$indices)
+i_p = which.max(e_nn$abs$indices)
 -----r
 options(repr.plot.width=9, repr.plot.height=4)
 par(mfrow=c(1,2))
 
-plotPredReal(data, p_nn_exo, i_np); title(paste("PredReal nn exo day",i_np))
-plotPredReal(data, p_nn_exo, i_p); title(paste("PredReal nn exo day",i_p))
+plotPredReal(data, p_nn, i_np); title(paste("PredReal nn day",i_np))
+plotPredReal(data, p_nn2, i_p); title(paste("PredReal nn day",i_p))
 
-plotPredReal(data, p_nn_mix, i_np); title(paste("PredReal nn mix day",i_np))
-plotPredReal(data, p_nn_mix, i_p); title(paste("PredReal nn mix day",i_p))
+plotPredReal(data, p_nn2, i_np); title(paste("PredReal nn2 day",i_np))
+plotPredReal(data, p_nn2, i_p); title(paste("PredReal nn2 day",i_p))
 
 plotPredReal(data, p_az, i_np); title(paste("PredReal az day",i_np))
 plotPredReal(data, p_az, i_p); title(paste("PredReal az day",i_p))
@@ -76,50 +72,41 @@ plotPredReal(data, p_az, i_p); title(paste("PredReal az day",i_p))
 # Bleu: prévue, noir: réalisée
 -----r
 par(mfrow=c(1,2))
-f_np_exo = computeFilaments(data, p_nn_exo, i_np, plot=TRUE); title(paste("Filaments nn exo day",i_np))
-f_p_exo = computeFilaments(data, p_nn_exo, i_p, plot=TRUE); title(paste("Filaments nn exo day",i_p))
+f_np = computeFilaments(data, p_nn, i_np, plot=TRUE); title(paste("Filaments nn day",i_np))
+f_p = computeFilaments(data, p_nn, i_p, plot=TRUE); title(paste("Filaments nn day",i_p))
 
-f_np_mix = computeFilaments(data, p_nn_mix, i_np, plot=TRUE); title(paste("Filaments nn mix day",i_np))
-f_p_mix = computeFilaments(data, p_nn_mix, i_p, plot=TRUE); title(paste("Filaments nn mix day",i_p))
+f_np2 = computeFilaments(data, p_nn2, i_np, plot=TRUE); title(paste("Filaments nn2 day",i_np))
+f_p2 = computeFilaments(data, p_nn2, i_p, plot=TRUE); title(paste("Filaments nn2 day",i_p))
 -----r
 par(mfrow=c(1,2))
-plotFilamentsBox(data, f_np_exo); title(paste("FilBox nn exo day",i_np))
-plotFilamentsBox(data, f_p_exo); title(paste("FilBox nn exo day",i_p))
+plotFilamentsBox(data, f_np); title(paste("FilBox nn day",i_np))
+plotFilamentsBox(data, f_p); title(paste("FilBox nn day",i_p))
 
-plotFilamentsBox(data, f_np_mix); title(paste("FilBox nn mix day",i_np))
-plotFilamentsBox(data, f_p_mix); title(paste("FilBox nn mix day",i_p))
+plotFilamentsBox(data, f_np2); title(paste("FilBox nn2 day",i_np))
+plotFilamentsBox(data, f_p2); title(paste("FilBox nn2 day",i_p))
 -----r
 par(mfrow=c(1,2))
-plotRelVar(data, f_np_exo); title(paste("StdDev nn exo day",i_np))
-plotRelVar(data, f_p_exo); title(paste("StdDev nn exo day",i_p))
+plotRelVar(data, f_np); title(paste("StdDev nn day",i_np))
+plotRelVar(data, f_p); title(paste("StdDev nn day",i_p))
 
-plotRelVar(data, f_np_mix); title(paste("StdDev nn mix day",i_np))
-plotRelVar(data, f_p_mix); title(paste("StdDev nn mix day",i_p))
+plotRelVar(data, f_np2); title(paste("StdDev nn2 day",i_np))
+plotRelVar(data, f_p2); title(paste("StdDev nn2 day",i_p))
 
 # Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir
 -----r
 par(mfrow=c(1,2))
-plotSimils(p_nn_exo, i_np); title(paste("Weights nn exo day",i_np))
-plotSimils(p_nn_exo, i_p); title(paste("Weights nn exo day",i_p))
+plotSimils(p_nn, i_np); title(paste("Weights nn day",i_np))
+plotSimils(p_nn, i_p); title(paste("Weights nn day",i_p))
 
-plotSimils(p_nn_mix, i_np); title(paste("Weights nn mix day",i_np))
-plotSimils(p_nn_mix, i_p); title(paste("Weights nn mix day",i_p))
+plotSimils(p_nn2, i_np); title(paste("Weights nn2 day",i_np))
+plotSimils(p_nn2, i_p); title(paste("Weights nn2 day",i_p))
 
 # - pollué à gauche, + pollué à droite
 -----r
-# Fenêtres sélectionnées dans ]0,10] / endo à gauche, exo à droite
-p_nn_exo$getParams(i_np)$window
-p_nn_exo$getParams(i_p)$window
+# Fenêtres sélectionnées dans ]0,7] / nn à gauche, nn2 à droite
+p_nn$getParams(i_np)$window
+p_nn$getParams(i_p)$window
 
-p_nn_mix$getParams(i_np)$window
-p_nn_mix$getParams(i_p)$window
+p_nn2$getParams(i_np)$window
+p_nn2$getParams(i_p)$window
 % endfor
------
-<h2>Bilan</h2>
-
-Problème difficile : on ne fait guère mieux qu'une naïve moyenne des lendemains des jours
-similaires dans le passé, ce qui n'est pas loin de prédire une série constante égale à la
-dernière valeur observée (méthode "zéro"). La persistence donne parfois de bons résultats
-mais est trop instable (sensibilité à l'argument <code>same_day</code>).
-
-Comment améliorer la méthode ?
diff --git a/reports/report.ipynb b/reports/report.ipynb
index 3dac8ec..f0a06d0 100644
--- a/reports/report.ipynb
+++ b/reports/report.ipynb
@@ -75,17 +75,20 @@
     "reload(\"../pkg\")\n",
     "#p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n",
     "#p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "#p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n",
-    "p4 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"exo\")\n",
-    "p5 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")"
+    "p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n",
+    "p4 = computeForecast(data, indices_ch, \"Neighbors\", \"Neighbors\", horizon=H, simtype=\"mix\")\n",
+    "#p4 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"exo\")\n",
+    "#p5 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"endo\")\n",
+    "#p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -96,7 +99,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -107,17 +112,19 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
-    "e1 = computeError(data, p1, H)\n",
-    "e2 = computeError(data, p2, H)\n",
+    "#e1 = computeError(data, p1, H)\n",
+    "#e2 = computeError(data, p2, H)\n",
     "e3 = computeError(data, p3, H)\n",
     "e4 = computeError(data, p4, H)\n",
-    "e5 = computeError(data, p5, H)\n",
-    "e6 = computeError(data, p6, H)\n",
-    "plotError(list(e1,e2,e3,e4,e5,e6), cols=c(1,2,colors()[258], 4,5,6))"
+    "#e5 = computeError(data, p5, H)\n",
+    "#e6 = computeError(data, p6, H)\n",
+    "plotError(list(e3,e4), cols=c(1,2))"
    ]
   },
   {
@@ -130,7 +137,30 @@
    },
    "outputs": [],
    "source": [
-    "plotError(list(e4,e1,e2,e3, e5,e6), cols=c(1,2,3,4,5,6))"
+    "\tfirst_day = 1\n",
+    "params=p3$getParams(3)\n",
+    "\tfilter = (params$indices >= first_day)\n",
+    "\tindices = params$indices[filter]\n",
+    "\tweights = params$weights[filter]\n",
+    "\n",
+    "\n",
+    "\tgaps = sapply(indices, function(i) {\n",
+    "\t\tdata$getSerie(i+1)[1] - tail(data$getSerie(i), 1)\n",
+    "\t})\n",
+    "\tscal_product = weights * gaps\n",
+    "\tnorm_fact = sum( weights[!is.na(scal_product)] )\n",
+    "\tsum(scal_product, na.rm=TRUE) / norm_fact\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "hist(weights)"
    ]
   },
   {
@@ -146,14 +176,11 @@
     "options(repr.plot.width=9, repr.plot.height=4)\n",
     "par(mfrow=c(1,2))\n",
     "\n",
-    "plotPredReal(data, p_nn_exo, i_np); title(paste(\"PredReal nn exo day\",i_np))\n",
-    "plotPredReal(data, p_nn_exo, i_p); title(paste(\"PredReal nn exo day\",i_p))\n",
+    "plotPredReal(data, p3, 3); title(paste(\"PredReal nn exo day\",3))\n",
+    "plotPredReal(data, p3, 5); title(paste(\"PredReal nn exo day\",5))\n",
     "\n",
-    "plotPredReal(data, p_nn_mix, i_np); title(paste(\"PredReal nn mix day\",i_np))\n",
-    "plotPredReal(data, p_nn_mix, i_p); title(paste(\"PredReal nn mix day\",i_p))\n",
-    "\n",
-    "plotPredReal(data, p_az, i_np); title(paste(\"PredReal az day\",i_np))\n",
-    "plotPredReal(data, p_az, i_p); title(paste(\"PredReal az day\",i_p))\n",
+    "plotPredReal(data, p4, 3); title(paste(\"PredReal nn mix day\",3))\n",
+    "plotPredReal(data, p4, 5); title(paste(\"PredReal nn mix day\",5))\n",
     "\n",
     "# Bleu: prévue, noir: réalisée"
    ]
-- 
2.44.0