From 3d69ff21e577fc7bb082257280661b64536c20e8 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Mon, 13 Feb 2017 12:20:27 +0100
Subject: [PATCH] Initial commit

---
 .gitattributes                                |   5 +
 .gitfat                                       |   2 +
 .gitignore                                    |  11 +
 DESCRIPTION                                   |  36 ++
 LICENSE                                       |  22 +
 NOTES                                         |  29 +
 R/D_Neighbors.R                               |  17 +
 R/D_Persistence.R                             |   9 +
 R/D_Zero.R                                    |   9 +
 R/Data.R                                      |  78 +++
 R/Forecast.R                                  |  57 ++
 R/S_Average.R                                 |  27 +
 R/S_Neighbors.R                               | 233 ++++++++
 R/S_Persistence.R                             |  25 +
 R/ShapeForecaster.R                           |  37 ++
 R/getData.R                                   | 105 ++++
 R/getError.R                                  |  47 ++
 R/getForecast.R                               |  78 +++
 R/plot.R                                      | 166 ++++++
 R/utils.R                                     |  73 +++
 README.md                                     |  13 +
 data/README                                   |  13 +
 data/data.tar.xz                              |   1 +
 data/scripts/augment_meteo.R                  |  48 ++
 data/scripts/fill_NAs.R                       |  46 ++
 man/talweg-package.Rd                         |  42 ++
 reports/Reunion_28juin2016.docx               |   1 +
 .../AA_Benjamin-rapport.tex                   | 108 ++++
 reports/rapport_intermediaire/PageDeGarde.tex |  38 ++
 .../RapportIntermediaire.pdf                  |   1 +
 reports/report_02-02-2017.Rnw                 | 158 ++++++
 reports/report_02-02-2017.ipynb               | 508 ++++++++++++++++++
 reports/report_13-01-2017.rnw                 | 186 +++++++
 33 files changed, 2229 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .gitfat
 create mode 100644 .gitignore
 create mode 100644 DESCRIPTION
 create mode 100644 LICENSE
 create mode 100644 NOTES
 create mode 100644 R/D_Neighbors.R
 create mode 100644 R/D_Persistence.R
 create mode 100644 R/D_Zero.R
 create mode 100644 R/Data.R
 create mode 100644 R/Forecast.R
 create mode 100644 R/S_Average.R
 create mode 100644 R/S_Neighbors.R
 create mode 100644 R/S_Persistence.R
 create mode 100644 R/ShapeForecaster.R
 create mode 100644 R/getData.R
 create mode 100644 R/getError.R
 create mode 100644 R/getForecast.R
 create mode 100644 R/plot.R
 create mode 100644 R/utils.R
 create mode 100644 README.md
 create mode 100644 data/README
 create mode 100644 data/data.tar.xz
 create mode 100644 data/scripts/augment_meteo.R
 create mode 100644 data/scripts/fill_NAs.R
 create mode 100644 man/talweg-package.Rd
 create mode 100644 reports/Reunion_28juin2016.docx
 create mode 100644 reports/rapport_intermediaire/AA_Benjamin-rapport.tex
 create mode 100644 reports/rapport_intermediaire/PageDeGarde.tex
 create mode 100644 reports/rapport_intermediaire/RapportIntermediaire.pdf
 create mode 100644 reports/report_02-02-2017.Rnw
 create mode 100644 reports/report_02-02-2017.ipynb
 create mode 100644 reports/report_13-01-2017.rnw

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..027fa27
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,5 @@
+*.pdf filter=fat
+*.zip filter=fat
+*.tar.xz filter=fat
+*.docx filter=fat
+*.ipynb filter=nbstripout
diff --git a/.gitfat b/.gitfat
new file mode 100644
index 0000000..fbdebc2
--- /dev/null
+++ b/.gitfat
@@ -0,0 +1,2 @@
+[rsync]
+remote = gitfat@auder.net:~/files/rnormand
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7811b7f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+#NOTE: knitr generated files in ignored folder under reports (?!)
+.RData
+.Rhistory
+.ipynb_checkpoints/
+*.swp
+*~
+/data/*.csv
+/man/*
+!/man/*-package.Rd
+*.tar.gz
+NAMESPACE
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..82f3e84
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,36 @@
+Package: talweg
+Title: talweg : Time-series sAmpLes forecasted With ExoGenous variables
+Version: 0.1-0
+Description: Forecast a curve sampled within the day (seconds, minutes, hours...),
+    using past measured curves, history of some exogenous variables measurements
+		and the exogenous prediction for tomorrow. Main method is getForecast()
+Authors: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre],
+    Jean-Michel Poggi <Jean-Michel.Poggi@parisdescartes.fr> [ctb],
+    Bruno Portier <Bruno.Portier@insa-rouen.fr>, [ctb]
+Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr>
+Depends:
+    R (>= 3.0)
+Suggests:
+    roxygen2,
+    testthat,
+		rmarkdown,
+		rainbow
+LazyData: yes
+URL: http://git.auder.net/?p=talweg.git
+License: MIT + file LICENSE
+RoxygenNote: 5.0.1
+Collate:
+    'D_Neighbors.R'
+    'D_Persistence.R'
+    'D_Zero.R'
+    'Data.R'
+    'Forecast.R'
+    'ShapeForecaster.R'
+    'S_Average.R'
+    'S_Neighbors.R'
+    'S_Persistence.R'
+    'getData.R'
+    'getError.R'
+    'getForecast.R'
+    'plot.R'
+    'utils.R'
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..e4b50f1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+Copyright (c) 2016-2017, Benjamin AUDER
+              2016-2017, Jean-Michel Poggi
+              2016-2017, Bruno Portier
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/NOTES b/NOTES
new file mode 100644
index 0000000..14a3132
--- /dev/null
+++ b/NOTES
@@ -0,0 +1,29 @@
+NOTE: (Michel)
+La prévision ARPEGE pour le jour J est disponible dès 4h10 TU, soit 6h10 en été et 5h10 en hiver.
+ --> donc prévis utilisables à 7 et 13h mais pas à 0h
+Donc si 7h ou 13h on utilise les prévis du jour courant (à compléter),
+sinon (0h) les mesures du dernier jour (la veille)
+
+predict heure != moyenner 4 predict 1/4 heure
+plusieurs capteurs, moyennés pour indice ATMO
+moyenne spatiale sur les capteurs d'une ville ?
+règle pr calculer moyenne horaire : >= 75% de données
+
+Dates typiques (Michel):
+19/01 : pollution chauffage
+23/02 : pas pollué
+16/03 : pollution épandage
+
+#TODO:
+#indexer axe des temps avec vraies dates
+#==> tricher en regardant le lendemain, puis chercher paires consécutives similaires dans le
+#passé, ,,,, si aucune semblable : on va se planter !
+#--> retourner un indice de confiance ?!
+#fichier texte qui affiche le top 20 des + proches (genre sam 20 janvier 2010........)
+#fonction indices <---> dates
+systématiquement horizon == 12h comme plumelabs ?
+Courbes considérées comme objets ts (package R) ? (Sans doute inutile)
+Sur exo on doit prédire moyenne du jour au lieu de courbe --------> analyser (?!)
+
+--> javascript visualisation.................. + tard
+--> soft predict: ne pas virer les séries à NA
diff --git a/R/D_Neighbors.R b/R/D_Neighbors.R
new file mode 100644
index 0000000..f7bb5a8
--- /dev/null
+++ b/R/D_Neighbors.R
@@ -0,0 +1,17 @@
+#' Obtain delta forecast by the Neighbors method
+#'
+#' @inheritParams getForecast
+#' @inheritParams getZeroDeltaForecast
+getNeighborsDeltaForecast = function(data, today, memory, horizon, shape_params, ...)
+{
+	if (any(is.na(shape_params$weights) | is.na(shape_params$indices)))
+		return (NA)
+
+	gaps = sapply(shape_params$indices, function(i) {
+		data$getSerie(i+1)[1] - tail(data$getSerie(i), 1)
+	})
+
+	scal_product = shape_params$weights * gaps
+	norm_fact = sum( shape_params$weights[!is.na(scal_product)] )
+	sum(scal_product, na.rm=TRUE) / norm_fact
+}
diff --git a/R/D_Persistence.R b/R/D_Persistence.R
new file mode 100644
index 0000000..2f7935a
--- /dev/null
+++ b/R/D_Persistence.R
@@ -0,0 +1,9 @@
+#' Obtain delta forecast by the Persistence method
+#'
+#' @inheritParams getForecast
+#' @inheritParams getZeroDeltaForecast
+getPersistenceDeltaForecast = function(data, today, memory, horizon, shape_params, ...)
+{
+	# Return level of last similar "tomorrow" (thus -6) computed on "horizon"
+	data$getSerie(today-6)[1] - tail(data$getSerie(today-7), 1)
+}
diff --git a/R/D_Zero.R b/R/D_Zero.R
new file mode 100644
index 0000000..e9e5f6d
--- /dev/null
+++ b/R/D_Zero.R
@@ -0,0 +1,9 @@
+#' Just predict zero deltas (for reference)
+#'
+#' @inheritParams getForecast
+#' @param today Index of the current day (predict tomorrow)
+#' @param shape_params Optional parameters returned by the shape forecaster
+getZeroDeltaForecast = function(data, today, memory, horizon, shape_params, ...)
+{
+	0
+}
diff --git a/R/Data.R b/R/Data.R
new file mode 100644
index 0000000..311453b
--- /dev/null
+++ b/R/Data.R
@@ -0,0 +1,78 @@
+#' @title Data
+#'
+#' @description Data encapsulation
+#'
+#' @field data List of
+#' \itemize{
+#'   \item time: vector of times
+#'   \item serie: centered series
+#'   \item level: corresponding levels
+#'   \item exo_hat: predicted exogenous variables
+#'   \item exo_Dm1: List of measured exogenous variables at day minus 1
+#' }
+#'
+#' @exportClass Data
+#' @export Data
+Data = setRefClass(
+	Class = "Data",
+
+	fields = list(
+		data = "list"
+	),
+
+	methods = list(
+		initialize = function(...)
+		{
+			"Initialize empty Data object"
+
+			callSuper(...)
+		},
+		getSize = function()
+		{
+			length(data)
+		},
+		append = function(new_time, new_serie, new_level, new_exo_hat, new_exo_Dm1)
+		{
+			"Acquire a new vector of lists (time, serie, level, exo_hat, exo_Dm1)"
+
+			data[[length(data)+1]] <<- list("time"=new_time,"serie"=new_serie,"level"=new_level,
+				"exo_hat"=new_exo_hat,"exo_Dm1"=new_exo_Dm1)
+		},
+		getTime = function(index)
+		{
+			"Get time values at specified index"
+
+			data[[index]]$time
+		},
+		getCenteredSerie = function(index)
+		{
+			"Get serie values (centered) at specified index"
+
+			data[[index]]$serie
+		},
+		getLevel = function(index)
+		{
+			"Get level for the serie at specified index"
+
+			data[[index]]$level
+		},
+		getSerie = function(index)
+		{
+			"Get serie values (centered+level) at specified index"
+
+			data[[index]]$serie + data[[index]]$level
+		},
+		getExoHat = function(index)
+		{
+			"Get exogeous predictions at specified index"
+
+			data[[index]]$exo_hat
+		},
+		getExoDm1 = function(index)
+		{
+			"Get exogenous measures the day before specified index"
+
+			data[[index]]$exo_Dm1
+		}
+	)
+)
diff --git a/R/Forecast.R b/R/Forecast.R
new file mode 100644
index 0000000..1a3505b
--- /dev/null
+++ b/R/Forecast.R
@@ -0,0 +1,57 @@
+#' @title Forecast
+#'
+#' @description Forecast encapsulation
+#'
+#' @field pred List with
+#' \itemize{
+#'   \item serie: forecasted serie
+#'   \item params: corresponding list of parameters (weights, neighbors...)
+#'   \item index: corresponding index in data object
+#' }
+#'
+#' @exportClass Forecast
+#' @export Forecast
+Forecast = setRefClass(
+	Class = "Forecast",
+
+	fields = list(
+		pred = "list"
+	),
+
+	methods = list(
+		initialize = function(...)
+		{
+			"Initialize empty Forecast object"
+
+			callSuper(...)
+		},
+		append = function(new_serie, new_params, new_index)
+		{
+			"Obtain a new pair (serie, params)"
+
+			pred[[length(pred)+1]] <<- list("serie"=new_serie, "params"=new_params, "index"=new_index)
+		},
+		getSize = function()
+		{
+			length(pred)
+		},
+		getSerie = function(index)
+		{
+			"Get serie values at specified index"
+
+			pred[[index]]$serie
+		},
+		getParams = function(index)
+		{
+			"Get params at specified index"
+
+			pred[[index]]$params
+		},
+		getIndexInData = function(index)
+		{
+			"Get (day)index at specified index"
+
+			pred[[index]]$index
+		}
+	)
+)
diff --git a/R/S_Average.R b/R/S_Average.R
new file mode 100644
index 0000000..9694693
--- /dev/null
+++ b/R/S_Average.R
@@ -0,0 +1,27 @@
+#' @include ShapeForecaster.R
+#'
+#' @title Average Shape Forecaster
+#'
+#' @description Return the (pointwise) average of the all the centered day curves in the past.
+#'   Inherits \code{\link{ShapeForecaster}}
+AverageShapeForecaster = setRefClass(
+	Class = "AverageShapeForecaster",
+	contains = "ShapeForecaster",
+
+	methods = list(
+		initialize = function(...)
+		{
+			callSuper(...)
+		},
+		predict = function(today, memory, horizon, ...)
+		{
+			avg = rep(0., horizon)
+			for (i in (today-memory):today)
+			{
+				if (!any(is.na(data$getSerie(i))))
+					avg = avg + data$getCenteredSerie(i)
+			}
+			avg
+		}
+	)
+)
diff --git a/R/S_Neighbors.R b/R/S_Neighbors.R
new file mode 100644
index 0000000..c44bd9b
--- /dev/null
+++ b/R/S_Neighbors.R
@@ -0,0 +1,233 @@
+#' @include ShapeForecaster.R
+#'
+#' @title Neighbors Shape Forecaster
+#'
+#' @description Predict tomorrow as a weighted combination of "futures of the past" days.
+#'   Inherits \code{\link{ShapeForecaster}}
+NeighborsShapeForecaster = setRefClass(
+	Class = "NeighborsShapeForecaster",
+	contains = "ShapeForecaster",
+
+	methods = list(
+		initialize = function(...)
+		{
+			callSuper(...)
+		},
+		predict = function(today, memory, horizon, ...)
+		{
+			# (re)initialize computed parameters
+			params <<- list("weights"=NA, "indices"=NA, "window"=NA)
+
+			first_day = max(today - memory, 1)
+			# The first day is generally not complete:
+			if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(2)))
+				first_day = 2
+
+			# Predict only on non-NAs days (TODO:)
+			if (any(is.na(data$getSerie(today))))
+				return (NA)
+
+			# Determine indices of no-NAs days followed by no-NAs tomorrows
+			fdays_indices = c()
+			for (i in first_day:(today-1))
+			{
+				if ( !any(is.na(data$getSerie(i)) | is.na(data$getSerie(i+1))) )
+					fdays_indices = c(fdays_indices, i)
+			}
+
+			#GET OPTIONAL PARAMS
+			# Similarity computed with exogenous variables ? endogenous ? both ? ("exo","endo","mix")
+			simtype = ifelse(hasArg("simtype"), list(...)$simtype, "exo")
+			simthresh = ifelse(hasArg("simthresh"), list(...)$simthresh, 0.)
+			kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss")
+			mix_strategy = ifelse(hasArg("mix_strategy"), list(...)$mix_strategy, "neighb") #or "mult"
+			same_season = ifelse(hasArg("same_season"), list(...)$same_season, TRUE)
+			if (hasArg(h_window))
+				return (.predictAux(fdays_indices, today, horizon, list(...)$h_window, kernel, simtype,
+					simthresh, mix_strategy, FALSE))
+			#END GET
+
+			# Indices for cross-validation; TODO: 45 = magic number
+			indices = getSimilarDaysIndices(today, limit=45, same_season=same_season)
+			#indices = (end_index-45):(end_index-1)
+
+			# Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
+			errorOnLastNdays = function(h, kernel, simtype)
+			{
+				error = 0
+				nb_jours = 0
+				for (i in indices)
+				{
+					# NOTE: predict only on non-NAs days followed by non-NAs (TODO:)
+					if (!any(is.na(data$getSerie(i)) | is.na(data$getSerie(i+1))))
+					{
+						nb_jours = nb_jours + 1
+						# mix_strategy is never used here (simtype != "mix"), therefore left blank
+						prediction = .predictAux(fdays_indices, i, horizon, h, kernel, simtype, simthresh,
+							"", FALSE)
+						if (!is.na(prediction[1]))
+							error = error + mean((data$getCenteredSerie(i+1)[1:horizon] - prediction)^2)
+					}
+				}
+				return (error / nb_jours)
+			}
+
+			h_best_exo = 1.
+			if (simtype != "endo" && !(simtype=="mix" && mix_strategy=="neighb"))
+			{
+				h_best_exo = optimize(errorOnLastNdays, interval=c(0,10), kernel=kernel,
+					simtype="exo")$minimum
+			}
+			if (simtype != "exo")
+			{
+				h_best_endo = optimize(errorOnLastNdays, interval=c(0,10), kernel=kernel,
+					simtype="endo")$minimum
+			}
+
+			if (simtype == "endo")
+			{
+				return (.predictAux(fdays_indices, today, horizon, h_best_endo, kernel, "endo",
+					simthresh, "", TRUE))
+			}
+			if (simtype == "exo")
+			{
+				return (.predictAux(fdays_indices, today, horizon, h_best_exo, kernel, "exo",
+					simthresh, "", TRUE))
+			}
+			if (simtype == "mix")
+			{
+				return (.predictAux(fdays_indices, today, horizon, c(h_best_endo,h_best_exo), kernel,
+					"mix", simthresh, mix_strategy, TRUE))
+			}
+		},
+		# Precondition: "today" is full (no NAs)
+		.predictAux = function(fdays_indices, today, horizon, h, kernel, simtype, simthresh,
+			mix_strategy, final_call)
+		{
+			dat = data$data #HACK: faster this way...
+
+			fdays_indices = fdays_indices[fdays_indices < today]
+			# TODO: 3 = magic number
+			if (length(fdays_indices) < 3)
+				return (NA)
+
+			if (simtype != "exo")
+			{
+				h_endo = ifelse(simtype=="mix", h[1], h)
+
+				# Distances from last observed day to days in the past
+				distances2 = rep(NA, length(fdays_indices))
+				for (i in seq_along(fdays_indices))
+				{
+					delta = dat[[today]]$serie - dat[[ fdays_indices[i] ]]$serie
+					# Require at least half of non-NA common values to compute the distance
+					if (sum(is.na(delta)) <= 0) #length(delta)/2)
+						distances2[i] = mean(delta^2) #, na.rm=TRUE)
+				}
+
+				sd_dist = sd(distances2)
+				simils_endo =
+					if (kernel=="Gauss") {
+						exp(-distances2/(sd_dist*h_endo^2))
+					} else { #Epanechnikov
+						u = 1 - distances2/(sd_dist*h_endo^2)
+						u[abs(u)>1] = 0.
+						u
+					}
+			}
+
+			if (simtype != "endo")
+			{
+				h_exo = ifelse(simtype=="mix", h[2], h)
+
+				# TODO: [rnormand] if predict_at == 0h then we should use measures from day minus 1
+				M = matrix( nrow=1+length(fdays_indices), ncol=1+length(dat[[today]]$exo_hat) )
+				M[1,] = c( dat[[today]]$level, as.double(dat[[today]]$exo_hat) )
+				for (i in seq_along(fdays_indices))
+				{
+					M[i+1,] = c( dat[[ fdays_indices[i] ]]$level,
+						as.double(dat[[ fdays_indices[i] ]]$exo_hat) )
+				}
+
+				sigma = cov(M) #NOTE: robust covariance is way too slow
+				sigma_inv = qr.solve(sigma)
+
+				# Distances from last observed day to days in the past
+				distances2 = rep(NA, nrow(M)-1)
+				for (i in 2:nrow(M))
+				{
+					delta = M[1,] - M[i,]
+					distances2[i-1] = delta %*% sigma_inv %*% delta
+				}
+
+				sd_dist = sd(distances2)
+				simils_exo =
+					if (kernel=="Gauss") {
+						exp(-distances2/(sd_dist*h_exo^2))
+					} else { #Epanechnikov
+						u = 1 - distances2/(sd_dist*h_exo^2)
+						u[abs(u)>1] = 0.
+						u
+					}
+			}
+
+			if (simtype=="mix")
+			{
+				if (mix_strategy == "neighb")
+				{
+					#Only (60) most similar days according to exogen variables are kept into consideration
+					#TODO: 60 = magic number
+					keep_indices = sort(simils_exo, index.return=TRUE)$ix[1:(min(60,length(simils_exo)))]
+					simils_endo[-keep_indices] = 0.
+				} else #mix_strategy == "mult"
+				{
+					simils_endo = simils_endo * simils_exo
+				}
+			}
+
+			similarities =
+				if (simtype != "exo") {
+					simils_endo
+				} else {
+					simils_exo
+				}
+
+			if (simthresh > 0.)
+			{
+				max_sim = max(similarities)
+				# Set to 0 all similarities s where s / max_sim < simthresh, but keep at least 60
+				ordering = sort(similarities / max_sim, index.return=TRUE)
+				if (ordering[60] < simthresh)
+				{
+					similarities[ ordering$ix[ - (1:60) ] ] = 0.
+				} else
+				{
+					limit = 61
+					while (limit < length(similarities) && ordering[limit] >= simthresh)
+						limit = limit + 1
+					similarities[ ordering$ix[ - 1:limit] ] = 0.
+				}
+			}
+
+			prediction = rep(0, horizon)
+			for (i in seq_along(fdays_indices))
+				prediction = prediction + similarities[i] * dat[[ fdays_indices[i]+1 ]]$serie[1:horizon]
+
+			prediction = prediction / sum(similarities, na.rm=TRUE)
+			if (final_call)
+			{
+				params$weights <<- similarities
+				params$indices <<- fdays_indices
+				params$window <<-
+					if (simtype=="endo") {
+						h_endo
+					} else if (simtype=="exo") {
+						h_exo
+					} else {
+						c(h_endo,h_exo)
+					}
+			}
+			return (prediction)
+		}
+	)
+)
diff --git a/R/S_Persistence.R b/R/S_Persistence.R
new file mode 100644
index 0000000..017402f
--- /dev/null
+++ b/R/S_Persistence.R
@@ -0,0 +1,25 @@
+#' @include ShapeForecaster.R
+#'
+#' @title Persistence Shape Forecaster
+#'
+#' @description Return the last centered last (similar) day curve (may be a lot of NAs,
+#'   but we cannot do better). Inherits \code{\link{ShapeForecaster}}
+PersistenceShapeForecaster = setRefClass(
+	Class = "PersistenceShapeForecaster",
+	contains = "ShapeForecaster",
+
+	methods = list(
+		initialize = function(...)
+		{
+			callSuper(...)
+		},
+		predict = function(today, memory, horizon, ...)
+		{
+			#return centered last (similar) day curve (may be a lot of NAs, but we cannot do better)
+			last_similar_serie = data$getCenteredSerie(today-6)[1:horizon]
+			if (any(is.na(last_similar_serie))) #TODO:
+				return (NA)
+			last_similar_serie
+		}
+	)
+)
diff --git a/R/ShapeForecaster.R b/R/ShapeForecaster.R
new file mode 100644
index 0000000..0b448d7
--- /dev/null
+++ b/R/ShapeForecaster.R
@@ -0,0 +1,37 @@
+#' @title Shape Forecaster
+#'
+#' @description Generic class to represent a shape forecaster
+#'
+#' @field params List of computed parameters, potentially useful for some DeltaForecasters
+#' @field data Dataset, object of class Data
+ShapeForecaster = setRefClass(
+	Class = "ShapeForecaster",
+
+	fields = list(
+		params = "list",
+		data = "Data"
+	),
+
+	methods = list(
+		initialize = function(...)
+		{
+			"Initialize (generic) ShapeForecaster object"
+
+			callSuper(...)
+			if (!hasArg(data))
+				stop("ShapeForecaster must be initialized with a Data object")
+			params <<- list()
+		},
+		predict = function(today, memory, horizon, ...)
+		{
+			"Obtain a new forecasted time-serie (+ side-effect: compute parameters)"
+
+			#empty default implementation: to implement in inherited classes
+		},
+		getParameters = function()
+		{
+			params
+		}
+	)
+)
+
diff --git a/R/getData.R b/R/getData.R
new file mode 100644
index 0000000..c2e6842
--- /dev/null
+++ b/R/getData.R
@@ -0,0 +1,105 @@
+#' @title Acquire data in a clean format
+#'
+#' @description Take in input data frames and/or files containing raw data, and timezones, and
+#'   output a Data object, roughly corresponding to a list where each cell contains all value
+#'   for one day (see \code{?Data}). Current limitation: series (in working_tz) must start at
+#'   right after midnight (to keep in sync with exogenous vars)
+#'
+#' @param ts_data Time-series, as a data frame (DB style: 2 columns, first is date/time,
+#'   second is value) or a CSV file
+#' @param exo_data Exogenous variables, as a data frame or a CSV file; first comlumn is dates,
+#'   next block are measurements for the day, and final block are exogenous forecasts
+#' @param input_tz Timezone in the input files ("GMT" or e.g. "Europe/Paris")
+#' @param date_format How date/time are stored (e.g. year/month/day hour:minutes;
+#'   see \code{strptime})
+#' @param working_tz Timezone to work with ("GMT" or e.g. "Europe/Paris")
+#' @param predict_at When does the prediction take place ? ab[:cd][:ef] where a,b,c,d,e,f
+#'   in (0,9) and define an hour[minute[second]]; time must be present in the file
+#'
+#' @return An object of class Data
+#'
+#' @export
+getData = function(ts_data, exo_data,
+	input_tz="GMT", date_format="%d/%m/%Y %H:%M", working_tz="GMT", predict_at="00")
+{
+	# Sanity checks (not full, but sufficient at this stage)
+	if (!is.character(input_tz) || !is.character(working_tz))
+		stop("Bad timezone (see ?timezone)")
+	input_tz = input_tz[1]
+	working_tz = working_tz[1]
+	if (!is.data.frame(ts_data) && !is.character(ts_data))
+		stop("Bad time-series input (data frame or CSV file)")
+	if (is.character(ts_data))
+		ts_data = ts_data[1]
+	pattern_index_in_predict_at = grep("^[0-9]{2}(:[0-9]{2}){0,2}$", predict_at)
+	if (!is.character(predict_at) || length(pattern_index_in_predict_at) == 0)
+		stop("Bad predict_at ( ^[0-9]{2}(:[0-9]{2}){0,2}$ )")
+	predict_at = predict_at[ pattern_index_in_predict_at[1] ]
+	if (!is.character(date_format))
+		stop("Bad date_format (character)")
+	date_format = date_format[1]
+
+	ts_df =
+		if (is.character(ts_data)) {
+			read.csv(ts_data)
+		} else {
+			ts_data
+		}
+	exo_df =
+		if (is.character(exo_data)) {
+			read.csv(exo_data)
+		} else {
+			exo_data
+		}
+	# Convert to the desired timezone (usually "GMT" or "Europe/Paris")
+	formatted_dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz=input_tz)
+	ts_df[,1] = format(as.POSIXct(formatted_dates_POSIXlt), tz=working_tz, usetz=TRUE)
+
+	if (nchar(predict_at) == 2)
+		predict_at = paste(predict_at,":00",sep="")
+	if (nchar(predict_at) == 5)
+		predict_at = paste(predict_at,":00",sep="")
+
+	line = 1 #index in PM10 file (24 lines for 1 cell)
+	nb_lines = nrow(ts_df)
+	nb_exos = ( ncol(exo_df) - 1 ) / 2
+	data = list() #new("Data")
+	i = 1 #index of a cell in data
+	while (line <= nb_lines)
+	{
+		time = c()
+		serie = c()
+		repeat {
+		{
+			time = c(time, ts_df[line,1])
+			serie = c(serie, ts_df[line,2])
+			line = line + 1
+		};
+		if (line >= nb_lines + 1
+			# NOTE: always second part of date/time, because it has been formatted
+			|| strsplit(as.character(ts_df[line-1,1])," ")[[1]][2] == predict_at)
+		{
+			break
+		}}
+
+		# NOTE: if predict_at does not cut days at midnight,
+		# for the exogenous to be synchronized they need to be shifted
+		if (predict_at != "00:00:00")
+		{
+			exo_hat = as.data.frame(exo_df[max(1,i-1),(1+nb_exos+1):(1+2*nb_exos)])
+			exo_Dm1 = if (i>=3) as.data.frame(exo_df[i-1,2:(1+nb_exos)]) else NA
+		} else
+		{
+			exo_hat = as.data.frame(exo_df[i,(1+nb_exos+1):(1+2*nb_exos)])
+			exo_Dm1 = if (i>=2) as.data.frame(exo_df[i-1,2:(1+nb_exos)]) else NA
+		}
+		i = i + 1
+		#center data
+		level = mean(serie, na.rm=TRUE)
+		centered_serie = serie - level
+#		data$append(time, centered_serie, level, exo_hat, exo_Jm1) #TODO: slow: why ?
+		data[[length(data)+1]] = list("time"=time, "serie"=centered_serie, "level"=level,
+			"exo_hat"=exo_hat, "exo_Dm1"=exo_Dm1)
+	}
+	new("Data",data=data)
+}
diff --git a/R/getError.R b/R/getError.R
new file mode 100644
index 0000000..61e7910
--- /dev/null
+++ b/R/getError.R
@@ -0,0 +1,47 @@
+#' @title Get error
+#'
+#' @description Obtain the errors between forecast and data
+#'
+#' @param data Dataset, object of class \code{Data} output of \code{getData}
+#' @param forecast Forecast object, class \code{Forecast} output of \code{getForecast}
+#' @param horizon Horizon where to compute the error (<= horizon used in \code{getForecast})
+#'
+#' @return A list (abs,MAPE) of lists (day,indices)
+#'
+#' @export
+getError = function(data, forecast, horizon)
+{
+	L = forecast$getSize()
+	mape_day = rep(0, horizon)
+	abs_day = rep(0, horizon)
+	mape_indices = rep(NA, L)
+	abs_indices = rep(NA, L)
+
+	nb_no_NA_data = 0
+	for (i in seq_len(L))
+	{
+		index = forecast$getIndexInData(i)
+		serie = data$getSerie(index+1)[1:horizon]
+		pred = forecast$getSerie(i)[1:horizon]
+		if (!any(is.na(serie)) && !any(is.na(pred)))
+		{
+			nb_no_NA_data = nb_no_NA_data + 1
+			mape_increment = abs(serie - pred) / serie
+			mape_increment[is.nan(mape_increment)] = 0. # 0 / 0
+			mape_increment[!is.finite(mape_increment)] = 1. # >0 / 0
+			mape_day = mape_day + mape_increment
+			abs_increment = abs(serie - pred)
+			abs_day = abs_day + abs_increment
+			mape_indices[i] = mean(mape_increment)
+			abs_indices[i] = mean(abs_increment)
+		}
+	}
+
+	list(
+		"abs" = list(
+			"day" = abs_day / nb_no_NA_data,
+			"indices" = abs_indices),
+		"MAPE" = list(
+			"day" = mape_day / nb_no_NA_data,
+			"indices" = mape_indices) )
+}
diff --git a/R/getForecast.R b/R/getForecast.R
new file mode 100644
index 0000000..c3248a9
--- /dev/null
+++ b/R/getForecast.R
@@ -0,0 +1,78 @@
+#' @title get Forecast
+#'
+#' @description Predict time-series curves for the selected days indices (lines in data).
+#'   Run the forecasting task described by \code{delta_forecaster_name} and
+#'   \code{shape_forecaster_name} on data obtained with \code{getData}
+#'
+#' @param data Dataset, object of type \code{Data} output of \code{getData}
+#' @param indices Days indices where to forecast (the day after)
+#' @param memory Data depth (in days) to be used for prediction
+#' @param horizon Number of time steps to predict
+#' @param shape_forecaster_name Name of the shape forcaster
+#' \itemize{
+#'   \item Persistence : use values of last (similar, next) day
+#'   \item Neighbors : use PM10 from the k closest neighbors' tomorrows
+#'   \item Average : global average of all the (similar) "tomorrow of past"
+#' }
+#' @param delta_forecaster_name Name of the delta forecaster
+#' \itemize{
+#'   \item Persistence : use last (similar) day values
+#'   \item Neighbors: re-use the weights optimized in corresponding shape forecaster
+#'   \item Zero: just output 0 (no adjustment)
+#' }
+#' @param ... Additional parameters for the forecasting models
+#'
+#' @return An object of class Forecast
+#'
+#' @examples
+#' data = getData(ts_data="data/pm10_mesures_H_loc.csv", exo_data="data/meteo_extra_noNAs.csv",
+#'   input_tz = "Europe/Paris", working_tz="Europe/Paris", predict_at="07")
+#' pred = getForecast(data, 2200:2230, Inf, 12, "Persistence", "Persistence")
+#' \dontrun{#Sketch for real-time mode:
+#' data = new("Data", ...)
+#' repeat {
+#'   data$append(some_new_data)
+#'   pred = getForecast(data, ...)
+#'   #do_something_with_pred
+#' }}
+#' @export
+getForecast = function(data, indices, memory, horizon,
+	shape_forecaster_name, delta_forecaster_name, ...)
+{
+	horizon = as.integer(horizon)[1]
+	if (horizon<=0 || horizon>length(data$getCenteredSerie(2)))
+		stop("Horizon too short or too long")
+	indices = as.integer(indices)
+	if (any(indices<=0 | indices>data$getSize()))
+		stop("Indices out of range")
+	indices = sapply(indices, dateIndexToInteger, data)
+
+	#NOTE: some assymetry here...
+	shape_forecaster = new(paste(shape_forecaster_name,"ShapeForecaster",sep=""), data=data)
+	#A little bit strange, but match.fun() and get() fail
+	delta_forecaster = getFromNamespace(
+		paste("get",delta_forecaster_name,"DeltaForecast",sep=""), "talweg")
+
+	pred = list()
+	for (today in indices)
+	{
+		#NOTE: To estimate timing...
+#		print(paste("Predict until index",today))
+
+		#shape always predicted first (on centered series, no scaling taken into account),
+		#with side-effect: optimize some parameters (h, weights, ...)
+		predicted_shape = shape_forecaster$predict(today, memory, horizon, ...)
+		#then, delta prediction can re-use some variables optimized previously (like neighbors infos)
+		predicted_delta = delta_forecaster(data, today, memory, horizon,
+			shape_forecaster$getParameters(), ...)
+
+		#TODO: this way is faster than a call to append(); why ?
+		pred[[length(pred)+1]] = list(
+			# Predict shape and align it on end of current day
+			serie = predicted_shape + tail( data$getSerie(today), 1 ) - predicted_shape[1],
+				predicted_delta, #add predicted jump
+			params = shape_forecaster$getParameters(),
+			index = today )
+	}
+	new("Forecast",pred=pred)
+}
diff --git a/R/plot.R b/R/plot.R
new file mode 100644
index 0000000..e5d4753
--- /dev/null
+++ b/R/plot.R
@@ -0,0 +1,166 @@
+#' @title plot measured / predicted
+#'
+#' @description Plot measured curve (in black) and predicted curve (in red)
+#'
+#' @param data Object return by \code{getData}
+#' @param pred Object as returned by \code{getForecast}
+#' @param index Index in forecasts
+#'
+#' @export
+plotPredReal <- function(data, pred, index)
+{
+	horizon = length(pred$getSerie(1))
+	par(mar=c(4.7,5,1,1), cex.axis=2, cex.lab=2, lwd=2)
+	measure = data$getSerie(pred$getIndexInData(index)+1)[1:horizon]
+	yrange = range( pred$getSerie(index), measure )
+	plot(measure, type="l", ylim=yrange, lwd=3)
+	par(new=TRUE)
+	plot(pred$getSerie(index), type="l", col=2, ylim=yrange, lwd=3)
+}
+
+#' @title Plot filaments
+#'
+#' @description Plot similar days in the past + "past tomorrow", as black as distances are small
+#'
+#' @param data Object as returned by \code{getData}
+#' @param index Index in data
+#' @param limit Number of neighbors to consider
+#'
+#' @export
+plotFilaments <- function(data, index, limit=60)
+{
+	index = dateIndexToInteger(index, data)
+	ref_serie = data$getCenteredSerie(index)
+	if (any(is.na(ref_serie)))
+		stop("plotFilaments requires a serie without NAs")
+	L = length(ref_serie)
+	first_day = ifelse(length(data$getCenteredSerie(1)<L), 2, 1)
+	distances = sapply(first_day:(index-1), function(i) {
+		sqrt( sum( (ref_serie - data$getCenteredSerie(i))^2 ) / L )
+	})
+	# HACK to suppress NA effect while keeping indexation
+	distances[is.na(distances)] = max(distances,na.rm=TRUE) + 1
+	indices = sort(distances, index.return=TRUE)$ix[1:min(limit,index-first_day)]
+	yrange = range( ref_serie, sapply( indices, function(i) {
+		index = i - first_day + 1
+		serie = c(data$getCenteredSerie(index), data$getCenteredSerie(index+1))
+		if (!all(is.na(serie)))
+			return ( range(serie, na.rm=TRUE) )
+		return (0)
+	}) )
+	grays = gray.colors(20, 0.1, 0.9) #TODO: 20 == magic number
+	colors = c(
+		grays[ floor( 20.5 * distances[indices] / (1+max(distances[indices])) ) ], "#FF0000")
+	par(mar=c(4.7,5,1,1), cex.axis=2, cex.lab=2, lwd=2)
+	for (i in seq_len(length(indices)+1))
+	{
+		ind = ifelse(i<=length(indices), indices[i] - first_day + 1, index)
+		plot(c(data$getCenteredSerie(ind),data$getCenteredSerie(ind+1)),
+			ylim=yrange, type="l", col=colors[i],
+			xlab=ifelse(i==1,"Temps (en heures)",""), ylab=ifelse(i==1,"PM10 centré",""))
+		if (i <= length(indices))
+			par(new=TRUE)
+	}
+}
+
+#' @title Plot similarities
+#'
+#' @description Plot histogram of similarities (weights)
+#'
+#' @param pred Object as returned by \code{getForecast}
+#' @param index Index in forecasts (not in data)
+#'
+#' @export
+plotSimils <- function(pred, index)
+{
+	weights = pred$getParams(index)$weights
+	if (is.null(weights))
+		stop("plotSimils only works on 'Neighbors' forecasts")
+	par(mar=c(4.7,5,1,1))
+	hist(pred$getParams(index)$weights, nclass=20, xlab="Weight", ylab="Frequency")
+}
+
+#' @title Plot error
+#'
+#' @description Draw error graphs, potentially from several runs of \code{getForecast}
+#'
+#' @param err Error as returned by \code{getError}
+#'
+#' @seealso \code{\link{plotPredReal}}, \code{\link{plotFilaments}}, \code{\link{plotSimils}}
+#'   \code{\link{plotFbox}}
+#'
+#' @export
+plotError <- function(err)
+{
+	par(mfrow=c(2,2), mar=c(4.7,5,1,1), cex.axis=2, cex.lab=2, lwd=2)
+	L = length(err)
+	yrange = range( sapply(1:L, function(index) ( err[[index]]$abs$day ) ), na.rm=TRUE )
+	for (i in seq_len(L))
+	{
+		plot(err[[i]]$abs$day, type="l", xlab=ifelse(i==1,"Temps (heures)",""),
+			ylab=ifelse(i==1,"Moyenne |y - y_hat|",""), ylim=yrange, col=i)
+		if (i < L)
+			par(new=TRUE)
+	}
+	yrange = range( sapply(1:L, function(index) ( err[[index]]$abs$indices ) ), na.rm=TRUE )
+	for (i in seq_len(L))
+	{
+		plot(err[[i]]$abs$indices, type="l", xlab=ifelse(i==1,"Temps (jours)",""),
+			ylab=ifelse(i==1,"Moyenne |y - y_hat|",""), ylim=yrange, col=i)
+		if (i < L)
+			par(new=TRUE)
+	}
+	yrange = range( sapply(1:L, function(index) ( err[[index]]$MAPE$day ) ), na.rm=TRUE )
+	for (i in seq_len(L))
+	{
+		plot(err[[i]]$MAPE$day, type="l", xlab=ifelse(i==1,"Temps (heures)",""),
+			ylab=ifelse(i==1,"MAPE moyen",""), ylim=yrange, col=i)
+		if (i < L)
+			par(new=TRUE)
+	}
+	yrange = range( sapply(1:L, function(index) ( err[[index]]$MAPE$indices ) ), na.rm=TRUE )
+	for (i in seq_len(L))
+	{
+		plot(err[[i]]$MAPE$indices, type="l", xlab=ifelse(i==1,"Temps (jours)",""),
+			ylab=ifelse(i==1,"MAPE moyen",""), ylim=yrange, col=i)
+		if (i < L)
+			par(new=TRUE)
+	}
+}
+
+#' @title Functional boxplot
+#'
+#' @description Draw the functional boxplot on the left, and bivariate plot on the right
+#'
+#' @param data Object return by \code{getData}
+#' @param fiter Optional filter: return TRUE on indices to process
+#'
+#' @export
+plotFbox <- function(data, filter=function(index) (TRUE))
+{
+	if (!requireNamespace("rainbow", quietly=TRUE))
+		stop("Functional boxplot requires the rainbow package")
+
+	start_index = 1
+	end_index = data$getSize()
+	if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(2)))
+	{
+		# Shifted start (7am, or 1pm, or...)
+		start_index = 2
+		end_index = data$getSize() - 1
+	}
+
+	series_matrix = sapply(start_index:end_index, function(index) {
+		as.matrix(data$getSerie(index))
+	})
+	# Remove NAs. + filter TODO: merge with previous step: only one pass required...
+	nas_indices = seq_len(ncol(series_matrix))[ sapply( 1:ncol(series_matrix),
+		function(index) ( !filter(index) || any(is.na(series_matrix[,index])) ) ) ]
+	series_matrix = series_matrix[,-nas_indices]
+
+	series_fds = rainbow::fds(seq_len(nrow(series_matrix)), series_matrix)
+	par(mfrow=c(1,2), mar=c(4.7,5,1,1), cex.axis=2, cex.lab=2)
+	rainbow::fboxplot(series_fds, "functional", "hdr", xlab="Temps (heures)", ylab="PM10",
+		plotlegend=FALSE, lwd=2)
+	rainbow::fboxplot(series_fds, "bivariate", "hdr", plotlegend=FALSE)
+}
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..879a7d7
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,73 @@
+#' @title dateIndexToInteger
+#'
+#' @description Transform a (potential) date index into an integer (relative to data)
+#'
+#' @param index Date (or integer) index
+#' @param data Object of class \code{Data}
+#'
+#' @export
+dateIndexToInteger = function(index, data)
+{
+	if (is.numeric(index))
+		index = as.integer(index)
+	#Undocumented "private" method to translate index --> date (is needed)
+	if (is.integer(index))
+		return (index[1])
+	if (is.character(index))
+	{
+		tryCatch(dt <- as.POSIXct(index[1]), finally=print("Unrecognized index format"))
+		#TODO: tz arg to difftime ?
+		integerIndex <- as.integer( difftime(dt, data$getTime(1)) ) + 1
+		if (integerIndex > 0 && integerIndex <= data$getSize())
+			return (integerIndex)
+		stop("Date outside data range")
+	}
+	stop("Unrecognized index format")
+}
+
+#' @title getSimilarDaysIndices
+#'
+#' @description Find similar days indices in the past
+#'
+#' @param index Day index (numeric or date)
+#' @param limit Maximum number of indices to return
+#' @param same_seaon Should the indices correspond to day in same season?
+#'
+#' @export
+getSimilarDaysIndices = function(index, limit, same_season)
+{
+	index = dateIndexToInteger(index)
+
+	#TODO: mardi similaire à lundi mercredi jeudi aussi ...etc
+	if (!same_season)
+	{
+		#take all similar days in recent past
+		nb_days = min( (index-1) %/% 7, limit)
+		return ( rep(index,nb_days) - 7*seq_len(nb_days) )
+	}
+
+	#Look for similar days in similar season (+/- 30 days)
+	days = c()
+	i = index
+	while (i >= 1 && length(days) < limit)
+	{
+		if (i < index)
+		{
+			days = c(days, i)
+			#look in the "future of the past"
+			for (j in 1:4)
+				days = c(days, i+7*j)
+		}
+		#...and in the "past of the past"
+		for (j in 1:4)
+		{
+			if (i - 7*j >= 1)
+				days = c(days, i-7*j)
+		}
+		# TODO: exact computation instead of -364
+		# 364 = closest multiple of 7 to 365 - drift along the years... but not so many years so OK
+		i = i - 364
+	}
+
+	return ( days[1:min(limit,length(days))] )
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7847664
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+# Predict PM10 as functions of time
+
+Joint work with [Jean-Michel Poggi](http://www.math.u-psud.fr/~poggi/) and [Bruno Portier](http://lmi2.insa-rouen.fr/~bportier/)
+
+---
+
+TODO: ...
+
+---
+
+TODO: ...
+
+The final report can be found at [this location](http://www.airnormand.fr/Publications/Publications-telechargeables/Rapports-d-etudes)
diff --git a/data/README b/data/README
new file mode 100644
index 0000000..7275886
--- /dev/null
+++ b/data/README
@@ -0,0 +1,13 @@
+Données (compressées) dans data.tar.xz
+======================================
+
+pm10_mesures_[Q]H[_loc].csv : une mesure par ligne (quelques plages de NAs),
+du 10 décembre 2008 au 31 juillet 2016, en heure locale ou GMT.
+
+meteo[_extra[_noNA]].csv : les données météo initiales (mesures + prédictions).
+Mêmes dates ; noNA : avec NA remplacés par moyennes.
++ trois variables calculées,
+ * Season: 1 d'avril à août, 0 de septembre à mars
+ * Pollution: -1 si indéterminée (que des NAs), 0 si <30, 1 si entre 30 et 50, 2 si >50
+ * Week: 0 si week-end, 1 si jour de semaine
+(J'ai essayé d'appliquer en gros "0 = moins pollué, 1 = plus pollué")
diff --git a/data/data.tar.xz b/data/data.tar.xz
new file mode 100644
index 0000000..6e5e696
--- /dev/null
+++ b/data/data.tar.xz
@@ -0,0 +1 @@
+#$# git-fat fb2d21849524e743ce1b3e5589efb40d1182f468               564468
diff --git a/data/scripts/augment_meteo.R b/data/scripts/augment_meteo.R
new file mode 100644
index 0000000..19efe99
--- /dev/null
+++ b/data/scripts/augment_meteo.R
@@ -0,0 +1,48 @@
+meteo_df = read.csv("meteo.csv")
+
+meteo_df$Season = 0
+meteo_df$Week = 0
+meteo_df$Pollution = -1
+
+#Need to load and aggregate PM10 by days
+pm10_df = read.csv("pm10_mesures.csv")
+
+line_number = 1 #line number in pm10 file
+for (i in 1:(nrow(meteo_df)))
+{
+	pm10s = c()
+	repeat {
+	{
+		pm10s = c(pm10s, pm10_df[line_number,2])
+		line_number = line_number + 1
+	};
+	if (line_number >= nrow(pm10_df)+1
+		|| strsplit(as.character(pm10_df[line_number,1])," ")[[1]][2] == '0:15')
+	{
+		break
+	}}
+	pm10_level = mean(pm10s, na.rm=TRUE)
+	#Fill Pollution column: -1 if no info, 0 to 2 for pollution level
+	if (!is.nan(pm10_level))
+	{
+		if (pm10_level < 30)
+			meteo_df$Pollution[i] = 0
+		else if (pm10_level <= 50)
+			meteo_df$Pollution[i] = 1
+		else #pm10 > 50
+			meteo_df$Pollution[i] = 2
+	}
+
+	#Also fill season + days of week variables
+	meteo_df$Season[i] = ifelse(
+		strsplit(as.character(meteo_df$Date[i]),'/')[[1]][1] %in% c("4","5","6","7","8"),
+		1, 0)
+	current_datetime = strptime(as.character(meteo_df$Date[i]), "%m/%d/%Y", tz="GMT")
+	meteo_df$Week[i] = ifelse(current_datetime$wday %in% c(6,0), 0, 1)
+}
+
+#see also:
+#http://stackoverflow.com/questions/8214303/conditional-replacement-of-values-in-a-data-frame
+
+#Finally write new data
+write.csv(meteo_df, file="meteo_extra.csv", row.names=FALSE)
diff --git a/data/scripts/fill_NAs.R b/data/scripts/fill_NAs.R
new file mode 100644
index 0000000..72af4cb
--- /dev/null
+++ b/data/scripts/fill_NAs.R
@@ -0,0 +1,46 @@
+fill_NAs = function(file)
+{
+	dat_with_date = read.csv(file)
+	dat = dat_with_date[,-1] #get rid of date column
+
+	line = 1
+	n = nrow(dat)
+	p = ncol(dat)
+	last_noNA_indices = rep(0, p)
+	while (line <= n)
+	{
+#		print(line)
+		for (i in 1:p)
+		{
+#			if (is.numeric(dat[line,i]))
+			if (!is.na(dat[line,i]))
+			{
+				if (last_noNA_indices[i] < line-1)
+				{
+					#do some completion
+					meanVal = ifelse(last_noNA_indices[i]>0,
+												 0.5*(dat[last_noNA_indices[i],i]+dat[line,i]),
+												 dat[line,i])
+
+					for (j in (last_noNA_indices[i]+1):(line-1))
+						dat[j,i] = meanVal
+				}
+				last_noNA_indices[i] = line
+			}
+		}
+		line = line + 1
+	}
+
+	#complete until end if needed
+	for (i in 1:p)
+	{
+		if (last_noNA_indices[i] < n)
+		{
+			for (j in (last_noNA_indices[i]+1):n)
+				dat[j,i] = dat[last_noNA_indices[i],i]
+		}
+	}
+
+	dat_with_date[,2:(p+1)] = dat
+	write.csv(dat_with_date, "NONA.csv", row.names=FALSE)
+}
diff --git a/man/talweg-package.Rd b/man/talweg-package.Rd
new file mode 100644
index 0000000..c0a7515
--- /dev/null
+++ b/man/talweg-package.Rd
@@ -0,0 +1,42 @@
+\name{talweg-package}
+\alias{talweg-package}
+\alias{talweg}
+\docType{package}
+
+\title{
+	\packageTitle{talweg}
+}
+
+\description{
+	\packageDescription{talweg}
+}
+
+\details{
+	The package devtools should be useful in development stage, since we rely on testthat for
+	unit tests, and roxygen2 for documentation. knitr is used to generate the package vignette.
+	Concerning the other suggested packages:
+	\itemize{
+		\item{TODO...;}
+		\item{TODO...;}
+	}
+
+	The three main functions are located in R/main.R:
+	\itemize{
+		\item{TODO...;}
+		\item{TODO...;}
+	}
+}
+
+\author{
+	\packageAuthor{talweg}
+
+	Maintainer: \packageMaintainer{talweg}
+}
+
+%\references{
+%	TODO: Literature or other references for background information
+%}
+
+%\examples{
+%	TODO: simple examples of the most important functions
+%}
diff --git a/reports/Reunion_28juin2016.docx b/reports/Reunion_28juin2016.docx
new file mode 100644
index 0000000..e7d07ad
--- /dev/null
+++ b/reports/Reunion_28juin2016.docx
@@ -0,0 +1 @@
+#$# git-fat c45d30e178d516079466abb2b1d39ff10f1f0ade                96701
diff --git a/reports/rapport_intermediaire/AA_Benjamin-rapport.tex b/reports/rapport_intermediaire/AA_Benjamin-rapport.tex
new file mode 100644
index 0000000..b285814
--- /dev/null
+++ b/reports/rapport_intermediaire/AA_Benjamin-rapport.tex
@@ -0,0 +1,108 @@
+\documentclass[a4paper,12pt]{article}
+
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+
+\renewcommand*\familydefault{\sfdefault} 
+
+%\marginparwidth 0pt
+%\oddsidemargin 0pt
+%\evensidemargin 0pt
+%\marginparsep 0pt
+%\topmargin 0pt
+%\textwidth 16cm
+%\textheight 23cm
+%\parindent 5mm
+
+%%%%%%% FROM KNITR
+
+\usepackage[pdftex]{graphicx}
+\usepackage[]{color}
+%% maxwidth is the original width if it is less than linewidth
+%% otherwise use linewidth (to make sure the graphics do not exceed the margin)
+\makeatletter
+\def\maxwidth{ %
+  \ifdim\Gin@nat@width>\linewidth
+    \linewidth
+  \else
+    \Gin@nat@width
+  \fi
+}
+\makeatother
+
+\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
+\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
+\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
+\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
+\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
+\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
+\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
+\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
+\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
+\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
+
+\usepackage{framed}
+\makeatletter
+\newenvironment{kframe}{%
+ \def\at@end@of@kframe{}%
+ \ifinner\ifhmode%
+  \def\at@end@of@kframe{\end{minipage}}%
+  \begin{minipage}{\columnwidth}%
+ \fi\fi%
+ \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
+ \colorbox{shadecolor}{##1}\hskip-\fboxsep
+     % There is no \\@totalrightmargin, so:
+     \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
+ \MakeFramed {\advance\hsize-\width
+   \@totalleftmargin\z@ \linewidth\hsize
+   \@setminipage}}%
+ {\par\unskip\endMakeFramed%
+ \at@end@of@kframe}
+\makeatother
+
+\definecolor{shadecolor}{rgb}{.97, .97, .97}
+\definecolor{messagecolor}{rgb}{0, 0, 0}
+\definecolor{warningcolor}{rgb}{1, 0, 1}
+\definecolor{errorcolor}{rgb}{1, 0, 0}
+\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
+
+\usepackage{alltt}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+
+\renewcommand*\familydefault{\sfdefault}
+
+\marginparwidth 0pt
+\oddsidemargin 0pt
+\evensidemargin 0pt
+\marginparsep 0pt
+\topmargin 0pt
+\textwidth 16cm
+\textheight 23cm
+\parindent 5mm
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+
+%%%%%%%% END FROM KNITR
+
+%%%%%%%%% page de garde
+
+%\usepackage[a4paper,left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry}
+\usepackage[frenchb]{babel}
+%\usepackage[pdftex]{graphicx}
+
+\usepackage{Sweave}
+% \usepackage[dvips]{graphicx}
+\usepackage{float}
+%\usepackage{multido}
+
+\setlength{\parindent}{0cm}
+\setlength{\parskip}{1ex plus 0.5ex minus 0.2ex}
+%\newcommand{\hsp}{\hspace{20pt}}
+\newcommand{\HRule}{\rule{\linewidth}{0.5mm}}
+
+%%%%%%% end page de garde
+
+\include{PageDeGarde}
+\include{R_Package}
+
+\end{document}
diff --git a/reports/rapport_intermediaire/PageDeGarde.tex b/reports/rapport_intermediaire/PageDeGarde.tex
new file mode 100644
index 0000000..f6aa8f0
--- /dev/null
+++ b/reports/rapport_intermediaire/PageDeGarde.tex
@@ -0,0 +1,38 @@
+\begin{document}
+
+\begin{titlepage}
+  \begin{sffamily}
+  \begin{center}
+
+    % Upper part of the page. The '~' is needed because \\
+    % only works if a paragraph has started.
+    \includegraphics[width=3cm]{FIGURES/logo_upsud.png}\hspace{2cm}
+    \includegraphics[width=4cm]{FIGURES/logo_parisdescartes.png}~\\[0.5cm]
+    \includegraphics[width=3cm]{FIGURES/logo_insarouen.jpg}\hspace{2cm}
+    \includegraphics[width=4cm]{FIGURES/logo_airnormand.jpg}~\\[2cm]
+
+    \textsc{\Large Rapport intermédiaire 15/12/2016}\\[2cm]
+
+    % Title
+    \HRule \\[2cm]
+    { \LARGE \bfseries PM10 : prédiction de courbes journalières\\[2cm] }
+		{\normalsize
+    % Author and supervisor
+    \begin{minipage}{0.4\textwidth}
+      \begin{flushleft} \large
+        Benjamin AUDER\\
+        Jean-Michel POGGI\\
+				Bruno PORTIER\\
+      \end{flushleft}
+    \end{minipage}
+    \begin{minipage}{0.4\textwidth}
+      \begin{flushright} \large
+       	Université Paris-Sud\\
+        Université Paris 5 \& Paris-Sud\\
+				INSA Rouen Normandie\\
+      \end{flushright}
+    \end{minipage}
+	}
+  \end{center}
+  \end{sffamily}
+\end{titlepage}
diff --git a/reports/rapport_intermediaire/RapportIntermediaire.pdf b/reports/rapport_intermediaire/RapportIntermediaire.pdf
new file mode 100644
index 0000000..e4cad43
--- /dev/null
+++ b/reports/rapport_intermediaire/RapportIntermediaire.pdf
@@ -0,0 +1 @@
+#$# git-fat cdf287cbbb06e773d911a86fed964ba258f27814              5614008
diff --git a/reports/report_02-02-2017.Rnw b/reports/report_02-02-2017.Rnw
new file mode 100644
index 0000000..bba8896
--- /dev/null
+++ b/reports/report_02-02-2017.Rnw
@@ -0,0 +1,158 @@
+\documentclass[a4paper,12pt]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+
+\renewcommand*\familydefault{\sfdefault}
+
+\marginparwidth 0pt
+\oddsidemargin 0pt
+\evensidemargin 0pt
+\marginparsep 0pt
+\topmargin 0pt
+\textwidth 16cm
+\textheight 23cm
+\parindent 5mm
+
+\begin{document}
+
+\section{Package R "ppmfun"}
+
+Le package $-$ Predict PM10 with FUNctional methods $-$ contient le code permettant de (re)lancer
+les expériences numériques décrites dans ce document. La fonction principale \emph{predictPM10}
+se divise en trois parties, décrites successivement au cours des trois paragraphes suivants.\\
+
+<<setup, out.width='7cm', out.height='7cm'>>=
+#Chargement de la librairie (après compilation, "R CMD INSTALL ppmfun/")
+library(ppmfun)
+@
+
+Note : sur la base de nos dernières expériences, on considère que
+\begin{itemize}
+  \item on ne touche pas à la fenêtre obtenue par optim() ;}
+  \item on oublie la méthode consistant à prédire forme et niveau de manière complètement
+    déconnectée : il faut relier les deux.
+\end{itemize}
+
+\subsection{Acquisition des données}
+
+Compte-tenu de la nature hétérogène des données utilisées $-$ fonctionnelles pour les PM10,
+vectorielles pour les variables exogènes $-$, celles-ci sont organisées sous forme d'une liste
+\emph{data}, la $i^{eme}$ cellule correspondant aux données disponibles au $i^{eme}$ jour à
+l'heure $H$ de prédiction choisie (1h00, 8h00 ou 14h00) : c'est-à-dire les valeurs des PM10 de
+$H-24h$ à $H-1H$, ainsi que les variables météo prédites pour la période de $1h00$ à $0h$ du
+jour courant (sauf si on prédit à 0h : on prend alors les valeurs mesurées de la veille).\\
+
+Exemple :\\
+<<data>>=
+#Le premier argument indique la zone horaire souhaitée ; "GMT" ou "local"
+#pour l'heure française, ou tout autre fuseau horaire.
+data = getData("local", "7h")
+@
+
+\subsection{Prédiction}
+
+Deux types de prévisions du prochain bloc de $24h$ sont à distinguer :
+\begin{itemize}
+  \item prévision de la forme (centrée) ;
+  \item prévision du saut d'une fin de série au début de la suivante.
+\end{itemize}
+
+\noindent Il faut ainsi préciser à la fois une méthode de prévision de forme ("Persistence" et
+"Neighbors" implémentées), et une méthode de prédiction de saut ("Zero", "Persistence" ou
+"Neighbors"). On détaille surtout la méthode à voisins ci-après.\\
+
+\begin{enumerate}
+  \item \textbf{Préparation des données} : calcul des niveaux sur 24h, fenêtrage si demandé
+    (paramètre "memory").
+  \item \textbf{Optimisation des paramètres d'échelle} : via la fonction \emph{optim()}
+    minimisant la somme des 45 dernières erreurs jounalières L2.
+  \item \textbf{Prédiction finale} : une fois le (ou les, si "simtype" vaut "mix") paramètre
+    d'échelle $h$ déterminé, les similarités sont évaluées sur les variables exogènes et/ou
+    endogènes, sous la forme $s(i,j) = \mbox{exp}\left(-\frac{\mbox{dist}^2(i,j)}{h^2}\right)$.
+    La formule indiquée plus haut dans le rapport est alors appliquée.
+\end{enumerate}
+
+\subsection{Calcul des erreurs}
+
+Pour chacun des instants à prévoir jusqu'à minuit du jour courant, on calcule l'erreur moyenne
+sur tous les instants similaires du passé (sur la plage prédite). Trois
+types d'erreurs sont considérées :
+\begin{itemize}
+  \item l'erreur "abs" égale à la valeur absolue moyenne entre la mesure et la prédiction ;
+  \item l'erreur "MAPE" égale à l'erreur absolue normalisée par la mesure.
+  \item l'erreur "RMSE" égale à la racine carrée de l'erreur quadratique moyenne.
+\end{itemize}
+
+\subsection{Expériences numériques}
+
+%, fig.show='hold'>>=
+<<xp1, out.width='18cm', out.height='6cm'>>=
+p_endo = predictPM10(data, 2200, 2230, 0,0, "Neighbors", "Neighbors", simtype="endo")
+p_exo = predictPM10(data, 2200, 2230, 0,0, "Neighbors", "Neighbors", simtype="exo")
+p_mix = predictPM10(data, 2200, 2230, 0,0, "Neighbors", "Neighbors", simtype="mix")
+p = c(p_endo, p_exo, p_mix)
+yrange_MAPE = range(p_mix$errors$MAPE, p_endo$errors$MAPE, p_exo$errors$MAPE)
+yrange_abs = range(p_mix$errors$abs, p_endo$errors$abs, p_exo$errors$abs)
+yrange_RMSE = range(p_mix$errors$RMSE, p_endo$errors$RMSE, p_exo$errors$RMSE)
+ranges = c(yrange_MAPE,yrange_abs,yrange_RMSE)
+par(mfrow=c(1,3))
+titles = paste("Erreur",c("MAPE","abs","RMSE"))
+for (i in 1:3) #error type (MAPE,abs,RMSE)
+{
+  for (j in 1:3) #model (mix,endo,exo)
+  {
+    plot(p[j]$errors[[i]], type="l", col=j, main=titles[i], xlab="Temps",
+      ylab="Erreur", ylim=ranges[i])
+    par(new=TRUE)
+  }
+}
+
+#Ne tenir compte que des similarités sur les variables exogènes semble
+#conduire à l'erreur la plus faible.
+@
+
+<<xp2, out.width='18cm', out.height='6cm'>>=
+p_nn = predictPM10(data, 2200, 2230, 0, 0, "Neighbors", "Neighbors", sameSeaon=TRUE)
+p_np = predictPM10(data, 2200, 2230, 0, 0, "Neighbors", "Persistence", sameSeaon=TRUE)
+p_nz = predictPM10(data, 2200, 2230, 0, 0, "Neighbors", "Zero", sameSeaon=TRUE)
+p_pp = predictPM10(data, 2200, 2230, 0, 0, "Persistence", "Persistence")
+p_pz = predictPM10(data, 2200, 2230, 0, 0, "Persistence", "Zero")
+p = c(p_nn, p_np, p_nz, p_pp, p_pz)
+yrange_MAPE = range(p_nn$errors$MAPE, p_nz$errors$MAPE, p_np$errors$MAPE, p_pp$errors$MAPE, p_pz$errors$MAPE)
+yrange_abs = range(p_nn$errors$abs, p_nz$errors$abs, p_np$errors$abs, p_pp$errors$abs, p_pz$errors$abs)
+yrange_RMSE = range(p_nn$errors$RMSE, p_nz$errors$RMSE, p_np$errors$RMSE, p_pp$errors$RMSE, p_pz$errors$RMSE)
+ranges = c(yrange_MAPE,yrange_abs,yrange_RMSE)
+par(mfrow=c(1,3))
+for (i in 1:3) #error type (MAPE,abs,RMSE)
+{
+  for (j in 1:5) #model (nn,np,nz,pp,pz)
+  {
+    plot(p[j]$errors[[i]], type="l", col=j, main=titles[i], xlab="Temps",
+      ylab="Erreur", ylim=ranges[i])
+    if (j<5)
+      par(new=TRUE)
+  }
+}
+
+#Meilleurs results: nn et nz (np moins bon)
+@
+
+%%TODO: analyse sur les trois périodes indiquées par Michel ; simtype=="exo" par defaut
+16/03/2015
+p_nn_epandage = predictPM10(data, 2200, 2200, 0, 0, "Neighbors", "Neighbors", sameSeaon=FALSE)
+19/01/2015
+p_nn_chauffage = predictPM10(data, 2200, 2200, 0, 0, "Neighbors", "Neighbors", sameSeaon=FALSE)
+23/02/2015
+p_nn_nonpollue = predictPM10(data, 2200, 2200, 0, 0, "Neighbors", "Neighbors", sameSeaon=FALSE)
+
+\subsection{Suite du travail}
+
+Le type de jour n'est pas pris en compte dans la recherche de voisins ; cela diminuerait
+nettement le nombre de similarités retenues, mais pourrait significativement améliorer les
+prévisions. \textcolor{blue}{OK : on le prend désormais en compte}\\
+
+\noindent Il serait intéressant également de disposer de plusieurs méthodes de prédiction, pour
+par exemple les agréger à l'aide de méthodes similaires à celles du précédent contrat.
+\textcolor{blue}{OK : on commence à en avoir quelques-unes}
+
+\end{document}
diff --git a/reports/report_02-02-2017.ipynb b/reports/report_02-02-2017.ipynb
new file mode 100644
index 0000000..4f49077
--- /dev/null
+++ b/reports/report_02-02-2017.ipynb
@@ -0,0 +1,508 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Package R \"vortex\"\n",
+    "\n",
+    "using Vectorial exOgenous variables to foRecast Time-sErieX.\n",
+    "\n",
+    "Ce package permet de prévoir des courbes de PM10 (par exemple), en se basant sur l'historique des valeurs mais aussi des variables exogènes (par exemple la météo).\n",
+    "\n",
+    "Fonctions principales :\n",
+    "\n",
+    " * <code>getData</code> : charge un jeu de données en mémoire\n",
+    " * <code>getForecast</code> : prédit les lendemains aux indices demandés\n",
+    "\n",
+    "Diverses méthodes permettent ensuite d'analyser les performances : <code>getError</code>, <code>plotXYZ</code> : voir la section \"see also\" dans <code>?plotError</code>."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "#Chargement de la librairie (après compilation, \"R CMD INSTALL ppmfun/\")\n",
+    "library(vortex)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note : sur la base de nos dernières expériences, on considère que \n",
+    "\n",
+    " * on ne touche pas à la fenêtre obtenue par la fonction <code>optimize</code> ;\n",
+    " * on oublie la méthode consistant à prédire forme et niveau de manière complètement déconnectée : il faut relier les deux.\n",
+    "\n",
+    "### Acquisition des données\n",
+    "\n",
+    "Compte-tenu de la nature hétérogène des données utilisées $-$ fonctionnelles pour les PM10, vectorielles pour les variables exogènes $-$, celles-ci sont encapsulées (comme des listes) dans un objet de type *Data*. En interne, la $i^{eme}$ cellule correspondant aux données disponibles au $i^{eme}$ jour à l'heure $H$ de prédiction choisie (1h00, 8h00 ou 14h00) : c'est-à-dire les valeurs des PM10 de $H-24h$ à $H-1h$, ainsi que les variables météo prédites pour la période de 1h à 0h du jour courant (sauf si on prédit à 0h : on prend alors les valeurs mesurées de la veille).\n",
+    "\n",
+    "Méthodes d'un objet de classe \"Data\" : elles prennent comme argument \"index\", qui est un index entier ; mais une fonction de conversion existe : <code>dateIndexToInteger</code>.\n",
+    "\n",
+    " * <code>getTime</code> : suite des date+heure\n",
+    " * <code>getCenteredSerie</code> : série centrée\n",
+    " * <code>getLevel</code> : niveau\n",
+    " * <code>getSerie</code> : série *non* centrée\n",
+    " * <code>getExoHat</code> : variables exogènes prévues\n",
+    " * <code>getExoDm1</code> : variables exogènes mesurées la veille\n",
+    "\n",
+    "Exemple :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "ERROR",
+     "evalue": "Error in eval(expr, envir, enclos): could not find function \"getData\"\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in eval(expr, envir, enclos): could not find function \"getData\"\nTraceback:\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Voir ?getData pour les arguments\n",
+    "data = getData(ts_data=\"data/pm10_mesures_H_loc.csv\", exo_data=\"data/meteo_extra_noNAs.csv\",\n",
+    "  input_tz = \"Europe/Paris\", working_tz=\"Europe/Paris\", predict_at=\"07\")\n",
+    "data$getLevel(10) #niveau du jour 10\n",
+    "data$getExoHat(17) #météo prévue pour le jour 18"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Prédiction\n",
+    "\n",
+    "Deux types de prévisions du prochain bloc de $24h$ sont à distinguer :\n",
+    "\n",
+    " * prévision de la forme (centrée) ;\n",
+    " * prévision du saut d'une fin de série au début de la suivante.\n",
+    "\n",
+    "Il faut ainsi préciser à la fois une méthode de prévision de forme (\"Average\", \"Persistence\" et \"Neighbors\" sont implémentées), et une méthode de prédiction de saut (\"Zero\", \"Persistence\" ou \"Neighbors\"). On détaille surtout la méthode à voisins ci-après, les autres étant des approches naïves que l'on peut considérer comme des références à améliorer.\n",
+    "\n",
+    " 1. **Préparation des données** : fenêtrage si demandé (paramètre \"memory\"), recherche des paires de jours consécutifs sans valeurs manquantes.\n",
+    " 2. **Optimisation des paramètres d'échelle** : via la fonction <code>optimize</code> minimisant la somme des 45 dernières erreurs jounalières RMSE, sur des jours similaires.\n",
+    " 3. **Prédiction finale** : une fois le (ou les, si \"simtype\" vaut \"mix\") paramètre d'échelle $h$ déterminé, les similarités sont évaluées sur les variables exogènes et/ou endogènes, sous la forme $s(i,j) = \\mbox{exp}\\left(-\\frac{\\mbox{dist}^2(i,j)}{h^2}\\right)$. La formule indiquée plus haut dans le rapport est alors appliquée.\n",
+    "\n",
+    "Détail technique : la sortie de la méthode <code>getForecast</code> est un objet de type Forecast, encapsulant les séries prévues ainsi que tous les paramètres optimisés par la méthode \"Neighbors\". Fonctions disponibles (argument \"index\" comme pour les fonctions sur Data) :\n",
+    "\n",
+    " * <code>getSerie</code> : série prévue (sans les information de temps)\n",
+    " * <code>getParams</code> : liste des paramètres (poids, fenêtre, ...)\n",
+    " * <code>getIndexInData</code> : indice du jour où s'effectue la prévision relativement au jeu de données\n",
+    "\n",
+    "### Calcul des erreurs\n",
+    "\n",
+    "Pour chacun des instants à prévoir jusqu'à minuit du jour courant (ou avant : argument *horizon*), on calcule l'erreur moyenne sur tous les instants similaires du passé. Deux types d'erreurs sont considérées :\n",
+    "\n",
+    " * l'erreur \"abs\" égale à la valeur absolue moyenne entre la mesure et la prédiction ;\n",
+    " * l'erreur \"MAPE\" égale à l'erreur absolue normalisée par la mesure.\n",
+    "\n",
+    "### Expériences numériques"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "options(repr.plot.width=9, repr.plot.height=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_endo = predictPM10(data, 2200, 2230, 0,0, \"Neighbors\", \"Neighbors\", simtype=\"endo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_exo = predictPM10(data, 2200, 2230, 0,0, \"Neighbors\", \"Neighbors\", simtype=\"exo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_mix = predictPM10(data, 2200, 2230, 0,0, \"Neighbors\", \"Neighbors\", simtype=\"mix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p = list(p_endo, p_exo, p_mix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "yrange_MAPE = range(p_mix$errors$MAPE, p_endo$errors$MAPE, p_exo$errors$MAPE)\n",
+    "yrange_abs = range(p_mix$errors$abs, p_endo$errors$abs, p_exo$errors$abs)\n",
+    "yrange_RMSE = range(p_mix$errors$RMSE, p_endo$errors$RMSE, p_exo$errors$RMSE)\n",
+    "ranges = list(yrange_MAPE,yrange_abs,yrange_RMSE)\n",
+    "\n",
+    "par(mfrow=c(1,3))\n",
+    "titles = paste(\"Erreur\",c(\"MAPE\",\"abs\",\"RMSE\"))\n",
+    "for (i in 1:3) #error type (MAPE,abs,RMSE)\n",
+    "{\n",
+    "  for (j in 1:3) #model (mix,endo,exo)\n",
+    "  {\n",
+    "    xlab = if (j>1) \"\" else \"Temps\"\n",
+    "    ylab = if (j>1) \"\" else \"Erreur\"\n",
+    "    main = if (j>1) \"\" else titles[i]\n",
+    "    plot(p[[j]]$errors[[i]], type=\"l\", col=j, main=main, xlab=xlab, ylab=ylab, ylim=ranges[[i]])\n",
+    "    if (j<3)\n",
+    "        par(new=TRUE)\n",
+    "  }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ne tenir compte que des similarités sur les variables exogènes semble conduire à l'erreur la plus faible."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_nn = predictPM10(data, 2200, 2230, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_np = predictPM10(data, 2200, 2230, 0, 0, \"Neighbors\", \"Persistence\", sameSeaon=TRUE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_nz = predictPM10(data, 2200, 2230, 0, 0, \"Neighbors\", \"Zero\", sameSeaon=TRUE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_pp = predictPM10(data, 2200, 2230, 0, 0, \"Persistence\", \"Persistence\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_pz = predictPM10(data, 2200, 2230, 0, 0, \"Persistence\", \"Zero\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p = list(p_nn, p_np, p_nz, p_pp, p_pz)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "yrange_MAPE = range(p_nn$errors$MAPE, p_nz$errors$MAPE, p_np$errors$MAPE, p_pp$errors$MAPE, p_pz$errors$MAPE)\n",
+    "yrange_abs = range(p_nn$errors$abs, p_nz$errors$abs, p_np$errors$abs, p_pp$errors$abs, p_pz$errors$abs)\n",
+    "yrange_RMSE = range(p_nn$errors$RMSE, p_nz$errors$RMSE, p_np$errors$RMSE, p_pp$errors$RMSE, p_pz$errors$RMSE)\n",
+    "ranges = list(yrange_MAPE,yrange_abs,yrange_RMSE)\n",
+    "\n",
+    "par(mfrow=c(1,3))\n",
+    "titles = paste(\"Erreur\",c(\"MAPE\",\"abs\",\"RMSE\"))\n",
+    "for (i in 1:3) #error type (MAPE,abs,RMSE)\n",
+    "{\n",
+    "  for (j in 1:5) #model (nn,np,nz,pp,pz)\n",
+    "  {\n",
+    "    xlab = if (j>1) \"\" else \"Temps\"\n",
+    "    ylab = if (j>1) \"\" else \"Erreur\"\n",
+    "    main = if (j>1) \"\" else titles[i]\n",
+    "    plot(p[[j]]$errors[[i]], type=\"l\", col=j, main=titles[i], xlab=xlab, ylab=ylab, ylim=ranges[[i]])\n",
+    "    if (j<5)\n",
+    "      par(new=TRUE)\n",
+    "  }\n",
+    "}\n",
+    "        \n",
+    "\n",
+    "p = list(p_nn_epandage, p_nn_nonpollue, p_nn_chauffage)\n",
+    "forecasts_2 = lapply(1:length(data), function(index) ( if (is.na(p[[2]]$forecasts[[index]][1])) rep(NA,24) else p[[2]]$forecasts[[index]]$pred ) )\n",
+    "e1 = getErrors(data, forecasts_1, 17)\n",
+    " \n",
+    "e = list(e1,e2,e3)\n",
+    "yrange_MAPE = range(e1$MAPE, e2$MAPE, e3$MAPE)\n",
+    "yrange_abs = range(e1$abs, e2$abs, e3$abs)\n",
+    "yrange_RMSE = range(e1$RMSE, e2$RMSE, e3$RMSE)\n",
+    "ranges = list(yrange_MAPE,yrange_abs,yrange_RMSE)\n",
+    "\n",
+    "par(mfrow=c(1,3))\n",
+    "titles = paste(\"Erreur\",c(\"MAPE\",\"abs\",\"RMSE\"))\n",
+    "for (i in 1:3) #error type (MAPE,abs,RMSE)\n",
+    "{\n",
+    "  for (j in 1:3) #model (nn,np,nz,pp,pz)\n",
+    "  {\n",
+    "    xlab = if (j>1) \"\" else \"Temps\"\n",
+    "    ylab = if (j>1) \"\" else \"Erreur\"\n",
+    "    main = if (j>1) \"\" else titles[i]\n",
+    "    plot(e[[j]][[i]], type=\"l\", col=j, main=titles[i], xlab=xlab, ylab=ylab, ylim=ranges[[i]])\n",
+    "    if (j<3)\n",
+    "      par(new=TRUE)\n",
+    "  }\n",
+    "}\n",
+    "\n",
+    "par(mfrow=c(1,2))\n",
+    "#p[[i]]$forecasts[[index]]\n",
+    "#futurs des blocs du passé pour le jour 2290 ::\n",
+    "futurs = lapply(1:length(p[[1]]$forecasts[[2290]]$indices),\n",
+    "       function(index) ( data[[ p[[1]]$forecasts[[2290]]$indices[index]+1 ]]$pm10 ) )\n",
+    "#vrai futur (en rouge), vrai jour (en noir)\n",
+    "r_min = min( sapply( 1:length(futurs), function(index) ( min(futurs[[index]] ) ) ) )\n",
+    "r_max = max( sapply( 1:length(futurs), function(index) ( max(futurs[[index]] ) ) ) )\n",
+    "for (i in 1:length(futurs))\n",
+    "{\n",
+    "    plot(futurs[[i]], col=1, ylim=c(r_min,r_max), type=\"l\")\n",
+    "    if (i<length(futurs))\n",
+    "        par(new=TRUE)\n",
+    "}\n",
+    "\n",
+    "plot(data[[2290]]$pm10, ylim=c(r_min, r_max), col=1, type=\"l\")\n",
+    "    par(new=TRUE)\n",
+    "plot(data[[2291]]$pm10, ylim=c(r_min, r_max), col=2, type=\"l\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Meilleurs results: nn et nz (np moins bon)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "#%%TODO: analyse sur les trois périodes indiquées par Michel ; simtype==\"exo\" par defaut\n",
+    "#16/03/2015 2288\n",
+    "p_nn_epandage = predictPM10(data, 2287, 2293, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE, simtype=\"mix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p_nn_epandage = predictPM10(data, 2287, 2293, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE, simtype=\"mix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "options(repr.plot.width=9, repr.plot.height=6)\n",
+    "plot(p_nn_epandage$errors$abs, type=\"l\", col=1, main=\"Erreur absolue\", xlab=\"Temps\",\n",
+    "     ylab=\"Erreur\", ylim=range(p_nn_epandage$errors$abs))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "#19/01/2015 2232\n",
+    "p_nn_chauffage = predictPM10(data, 2231, 2237, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "#23/02/2015 2267\n",
+    "p_nn_nonpollue = predictPM10(data, 2266, 2272, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE, simtype=\"mix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "plot(p_nn_nonpollue$errors$abs, type=\"l\", col=2, main=\"Erreur absolue\", xlab=\"Temps\",\n",
+    "     ylab=\"Erreur\", ylim=range(p_nn_nonpollue$errors$abs))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "library(ppmfun)\n",
+    "data = getData(\"local\", \"7h\")\n",
+    "p_nn_epandage = predictPM10(data, 2287, 2293, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE, simtype=\"endo\")\n",
+    "p_nn_nonpollue = predictPM10(data, 2266, 2272, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE, simtype=\"endo\")\n",
+    "p_nn_chauffage = predictPM10(data, 2231, 2237, 0, 0, \"Neighbors\", \"Neighbors\", sameSeaon=TRUE, simtype=\"endo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p = list(p_nn_epandage, p_nn_nonpollue, p_nn_chauffage)\n",
+    "#yrange_MAPE = range(p[[1]]$errors$MAPE, p[[2]]$errors$MAPE, p[[3]]$errors$MAPE)\n",
+    "#yrange_abs = range(p[[1]]$errors$abs, p[[2]]$errors$abs, p[[3]]$errors$abs)\n",
+    "#yrange_RMSE = range(p[[1]]$errors$RMSE, p[[2]]$errors$RMSE, p[[3]]$errors$RMSE)\n",
+    "#ranges = list(yrange_MAPE,yrange_abs,yrange_RMSE)\n",
+    "print(p[[1]]$forecasts[[2290]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "par(mfrow=c(1,3))\n",
+    "titles = paste(\"Erreur\",c(\"MAPE\",\"abs\",\"RMSE\"))\n",
+    "for (i in 1:3) #error type (MAPE,abs,RMSE)\n",
+    "{\n",
+    "  for (j in 1:5) #model (nn,np,nz,pp,pz)\n",
+    "  {\n",
+    "    xlab = if (j>1) \"\" else \"Temps\"\n",
+    "    ylab = if (j>1) \"\" else \"Erreur\"\n",
+    "    main = if (j>1) \"\" else titles[i]\n",
+    "    plot(p[[j]]$errors[[i]], type=\"l\", col=j, main=titles[i], xlab=xlab, ylab=ylab, ylim=ranges[[i]])\n",
+    "    if (j<5)\n",
+    "      par(new=TRUE)\n",
+    "  }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Bilan\n",
+    "\n",
+    "TODO"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "R",
+   "language": "R",
+   "name": "ir"
+  },
+  "language_info": {
+   "codemirror_mode": "r",
+   "file_extension": ".r",
+   "mimetype": "text/x-r-source",
+   "name": "R",
+   "pygments_lexer": "r",
+   "version": "3.3.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/reports/report_13-01-2017.rnw b/reports/report_13-01-2017.rnw
new file mode 100644
index 0000000..c2425af
--- /dev/null
+++ b/reports/report_13-01-2017.rnw
@@ -0,0 +1,186 @@
+\documentclass[a4paper,12pt]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+
+\renewcommand*\familydefault{\sfdefault}
+
+\marginparwidth 0pt
+\oddsidemargin 0pt
+\evensidemargin 0pt
+\marginparsep 0pt
+\topmargin 0pt
+\textwidth 16cm
+\textheight 23cm
+\parindent 5mm
+
+\begin{document}
+
+\section{Package R "ppmfun"}
+
+Le package $-$ Predict PM10 with FUNctional methods $-$ contient le code permettant de (re)lancer
+les expériences numériques décrites dans ce document. La fonction principale \emph{predictPM10}
+se divise en trois parties, décrites successivement au cours des trois paragraphes suivants.\\
+
+<<setup, out.width='7cm', out.height='7cm'>>=
+#Chargement de la librairie (après compilation, "R CMD INSTALL ppmfun/")
+library(ppmfun)
+
+#Exemple d'appel principal (détaillé ci-après)
+p_mix = predictPM10("GMT", "7h", 400, 30, "Neighbors", NULL, 0, "direct",
+  simtype="mix")
+
+#Allure des courbes prédites
+yrange = range(p_mix$forecasts[401:430], na.rm=TRUE)
+plot(0,xaxt='n',yaxt='n',bty='n',pch='',ylab='Prédictions PM10',
+  xlab='Temps',ylim=yrange,main="Courbes PM10 prédites")
+for (i in 401:430)
+{
+  if (!any(is.na(p_mix$forecasts[[i]])))
+  {
+    par(new=TRUE)
+    plot(p_mix$forecasts[[i]], type="l", ylim=yrange, xlab="", ylab="")
+  }
+}
+@
+
+L'appel à \emph{predictPM10()} ci-dessus se traduit par :
+\begin{enumerate}
+  \item charger les données découpées selon le temps universel, en segments de $24h$ de $7h15$ à
+    $7h$ le lendemain ;
+  \item commencer la prédiction au jour $400$, terminer au jour $400+30-1 = 429$ ;
+  \item utiliser la méthode "Neighbors" qui place plus de poids sur les voisins de la courbe de
+    PM10 du jour courant, en tenant compte de tout l'historique ;
+  \item raccorder continûment la prévision centrée aux mesures sur le dernier bloc de $24h$.
+\end{enumerate}
+
+\subsection{Acquisition des données}
+
+Compte-tenu de la nature hétérogène des données utilisées $-$ fonctionnelles pour les PM10,
+vectorielles pour les variables exogènes $-$, celles-ci sont organisées sous forme d'une liste
+\emph{data}, la $i^{eme}$ cellule correspondant aux données disponibles au $i^{eme}$ jour à
+l'heure $H$ de prédiction choisie (0h15, 7h15 ou 13h15) : c'est-à-dire les valeurs des PM10 de
+$H-24h$ à $H-15m$, ainsi que les variables météo mesurées du dernier jour complet avant l'heure
+$H$, et les variables météo prédites pour la période de $0h15$ à $0h$ du jour courant.\\
+
+Exemple :\\
+<<data>>=
+#Le premier argument indique la zone horaire souhaitée ; "GMT" ou "local"
+#pour l'heure française, ou tout autre fuseau horaire.
+data = getData("GMT", "7h")
+@
+
+\subsection{Prédiction}
+
+Deux types de prévisions du prochain bloc de $24h$ sont à distinguer :
+\begin{itemize}
+  \item prévision de la forme (centrée) ;
+  \item prévision du niveau.
+\end{itemize}
+
+\noindent Si l'on choisit de raccorder la prévision de la forme au dernier PM10 mesuré, alors le niveau n'a
+pas à être prédit (d'où l'argument \texttt{NULL} dans l'appel principal). Dans le cas contraire il faut
+préciser une méthode ; seule la persistance est actuellement implémentée. la méthode de prévision
+de forme "Neighbors" est détaillée ci-après (voir aussi le fichier S\_Neighbors.R).\\
+
+\begin{enumerate}
+  \item \textbf{Préparation des données} : calcul des niveaux sur 24h, fenêtrage si demandé
+    (paramètre "memory").
+  \item \textbf{Optimisation des paramètres d'échelle} : via la fonction \emph{optim()}
+    minimisant la somme des 45 dernières erreurs jounalières L2.
+  \item \textbf{Prédiction finale} : une fois le (ou les, si "simtype" vaut "mix") paramètre
+    d'échelle $h$ déterminé, les similarités sont évaluées sur les variables exogènes et/ou
+    endogènes, sous la forme $s(i,j) = \mbox{exp}\left(-\frac{\mbox{dist}^2(i,j)}{h^2}\right)$.
+    La formule indiquée plus haut dans le rapport est alors appliquée.
+\end{enumerate}
+
+Exemple :\\
+<<forecasts>>=
+forecasts = as.list(rep(NA, length(data)))
+for (i in 400:429)
+{
+  #forecast with data up to index i
+  forecasts[[i+1]] = getForecasts(data[1:i], "Neighbors", NULL, 0,
+    "direct", simtype="mix")
+}
+@
+
+\subsection{Calcul des erreurs}
+
+Pour chacun des instants à prévoir jusqu'à minuit du jour courant, on calcule l'erreur moyenne
+sur tous les instants similaires du passé (sur la plage prédite, dans l'exemple 401 à 430). Deux
+types d'erreurs sont considérées :
+\begin{itemize}
+  \item l'erreur "L1" égale à la valeur absolue entre la mesure et la prédiction ;
+  \item l'erreur "MAPE" égale à l'erreur L1 normalisée par la mesure.
+\end{itemize}
+
+Code :\\
+<<errors, out.width='7cm', out.height='7cm', fig.show='hold'>>=
+e = getErrors(data, forecasts)
+#Erreurs absolues, point par point, moyennées sur les 30 jours
+plot(e$L1, type="l", xlab="Temps", ylab="Erreur absolue")
+#Erreurs relatives, point par point, moyennées sur les 30 jours
+plot(e$MAPE, type="l", xlab="Temps", ylab="Erreur relative")
+@
+
+\subsection{Autres expériences numériques}
+
+<<others1, out.width='7cm', out.height='7cm', fig.show='hold'>>=
+p_endo = predictPM10("GMT", "7h", 400, 30, "Neighbors", NULL, 0,
+  "direct", simtype="endo")
+p_exo = predictPM10("GMT", "7h", 400, 30, "Neighbors", NULL, 0,
+  "direct", simtype="exo")
+yrange_L1 = range(p_mix$errors$L1, p_endo$errors$L1, p_exo$errors$L1)
+plot(p_mix$errors$L1, type="l", main="Erreur L1", xlab="Temps",
+  ylab="Erreur absolue", ylim=yrange_L1) ; par(new=TRUE)
+plot(p_endo$errors$L1, type="l", col=2, xlab="", ylab="",
+  ylim=yrange_L1) ; par(new=TRUE)
+plot(p_exo$errors$L1, type="l", col=3, xlab="", ylab="",
+  ylim=yrange_L1)
+yrange_MAPE =
+  range(p_mix$errors$MAPE, p_endo$errors$MAPE, p_exo$errors$MAPE)
+plot(p_mix$errors$MAPE, type="l", main="Erreur MAPE", xlab="Temps",
+  ylab="Erreur relative", ylim=yrange_MAPE) ; par(new=TRUE)
+plot(p_endo$errors$MAPE, type="l", col=2, xlab="", ylab="",
+  ylim=yrange_MAPE) ; par(new=TRUE)
+plot(p_exo$errors$MAPE, type="l", col=3, xlab="", ylab="",
+  ylim=yrange_MAPE)
+
+#Ne tenir compte que des similarités sur les variables exogènes semble
+#conduire à l'erreur la plus faible.
+@
+
+<<others2, out.width='7cm', out.height='7cm', fig.show='hold'>>=
+p_exo_h = predictPM10("GMT", "7h", 400, 30, "Neighbors", NULL, 0,
+  "direct", simtype="exo", h_window=0.25)
+plot(p_exo_h$errors$L1, type="l", main="Erreur L1", xlab="Temps",
+  ylab="Erreur absolue")
+plot(p_exo_h$errors$MAPE, type="l", main="Erreur MAPE", xlab="Temps",
+  ylab="Erreur relative")
+
+#Diminuer la fenêtre n'améliore pas les performances moyennes
+#(car les données individuelles sont très variables).
+@
+
+<<others3, out.width='7cm', out.height='7cm', fig.show='hold'>>=
+p_exo_s = predictPM10("GMT", "7h", 400, 30, "Neighbors", "Persistence",
+  0, "separate", simtype="exo")
+plot(p_exo_s$errors$L1, type="l", main="Erreur L1", xlab="Temps",
+  ylab="Erreur absolue")
+plot(p_exo_s$errors$MAPE, type="l", main="Erreur MAPE", xlab="Temps",
+  ylab="Erreur relative")
+
+#Prédire séparément forme et niveau mène à une erreur plus grande ;
+#d'autres méthodes de prévision du niveau doivent tout de même être testées.
+@
+
+\subsection{Suite du travail}
+
+Le type de jour n'est pas pris en compte dans la recherche de voisins ; cela diminuerait
+nettement le nombre de similarités retenues, mais pourrait significativement améliorer les
+prévisions.\\
+
+\noindent Il serait intéressant également de disposer de plusieurs méthodes de prédiction, pour
+par exemple les agréger à l'aide de méthodes similaires à celles du précédent contrat.
+
+\end{document}
-- 
2.44.0