From 2057c793ad9929ed5bef8663ea28b896c84df0fc Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Thu, 6 Apr 2017 18:14:41 +0200 Subject: [PATCH 01/16] on the way back without realtime --- pkg/DESCRIPTION | 6 +-- pkg/R/A_NAMESPACE.R | 4 ++ pkg/R/Data.R | 92 ++++++++++++++++++++--------------------- pkg/R/F_Average.R | 3 +- pkg/R/F_Neighbors.R | 4 +- pkg/R/F_Persistence.R | 6 +-- pkg/R/F_Zero.R | 4 +- pkg/R/J_Neighbors.R | 3 +- pkg/R/J_Persistence.R | 3 +- pkg/R/computeError.R | 3 +- pkg/R/computeForecast.R | 8 ++-- pkg/R/getData.R | 4 +- 12 files changed, 65 insertions(+), 75 deletions(-) diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index b1697df..a897984 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -2,7 +2,7 @@ Package: talweg Title: Time-Series Samples Forecasted With Exogenous Variables Version: 0.1-0 Description: Forecast a curve sampled within the day (seconds, minutes, - hours...), using past measured curves + paste exogenous informations, which + hours...), using past measured curves + past exogenous informations, which could be some aggregated measure on the past curves, the weather... Main starting point: computeForecast(). Author: Benjamin Auder [aut,cre], @@ -23,8 +23,8 @@ Suggests: LazyData: yes URL: http://git.auder.net/?p=talweg.git License: MIT + file LICENSE -RoxygenNote: 6.0.1 -Collate: +RoxygenNote: 5.0.1 +Collate: 'A_NAMESPACE.R' 'Data.R' 'Forecaster.R' diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R index 4558e59..bcd5df1 100644 --- a/pkg/R/A_NAMESPACE.R +++ b/pkg/R/A_NAMESPACE.R @@ -1,3 +1,7 @@ +#' @include Data.R +#' @include Forecast.R +#' @include Forecaster.R +#' #' @importFrom grDevices colors gray.colors #' @importFrom graphics abline hist par plot #' @importFrom methods hasArg is diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 551cfaf..53a8d5b 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -5,12 +5,14 @@ #' @docType class #' @importFrom R6 R6Class #' -#' @field .data List of \itemize{ +#' @field .data List of +#' \itemize{ #' \item time: vector of times -#' \item serie: centered series +#' \item centered_serie: centered series #' \item level: corresponding levels #' \item exo: exogenous variables -#' \item exo_hat: predicted exogenous variables} +#' \item exo_hat: predicted exogenous variables +#' } #' #' @section Methods: #' \describe{ @@ -18,24 +20,30 @@ #' Return number of series in dataset.} #' \item{\code{getStdHorizon()}}{ #' Return number of time steps from serie[1] until midnight} -#' \item{\code{appendHat(time, hat_serie, hat_exo)}}{ -#' New estimated data + time.} +#' \item{\code{appendHat(time, exo_hat)}}{ +#' New estimated exogenous variables + time} #' \item{\code{append(serie, exo)}}{ #' New measured data; call *after* \code{appendHat()}} #' \item{\code{getTime(index)}}{ #' Get times at specified index.} -#' \item{\code{getCenteredSerie(index, hat=FALSE)}}{ +#' \item{\code{getCenteredSerie(index)}}{ #' Get (measured or predicted) centered serie at specified index.} -#' \item{\code{getCenteredSeries(indices, hat=FALSE)}}{ +#' \item{\code{getCenteredSeries(indices)}}{ #' Get centered series at specified indices (in columns).} -#' \item{\code{getLevel(index, hat=FALSE)}}{ +#' \item{\code{getLevel(index)}}{ #' Get level at specified index.} -#' \item{\code{getSerie(index, hat=FALSE)}}{ +#' \item{\code{getSerie(index)}}{ #' Get serie (centered+level) at specified index.} -#' \item{\code{getSeries(indices, hat=FALSE)}}{ +#' \item{\code{getSeries(indices)}}{ #' Get series at specified indices (in columns).} -#' \item{\code{getExo(index, hat=FALSE)}}{ +#' \item{\code{getExoHat(index)}}{ +#' Get predicted exogenous variables at specified index.} +#' \item{\code{getExo(index)}}{ #' Get exogenous variables at specified index.} +#' \item{\code{removeFirst()}}{ +#' Remove first list element (if truncated).} +#' \item{\code{removeLast()}}{ +#' Remove last list element (if truncated).} #' } #' Data = R6::R6Class("Data", @@ -49,64 +57,54 @@ Data = R6::R6Class("Data", getStdHorizon = function() 24 - as.POSIXlt( private$.data[[1]]$time[1] )$hour + 1 , - appendHat = function(time, hat_serie, hat_exo) - { - hat_level = mean(hat_serie, na.rm=TRUE) - hat_centered_serie = hat_serie - hat_level - private$.data[[length(private$.data)+1]] <- list( - "time"=time, "hat_centered_serie"=hat_centered_serie, - "hat_level"=hat_level, "hat_exo"=hat_exo ) - }, - append = function(serie, exo) + appendHat = function(time, exo_hat) + private$.data[[length(private$.data)+1]] <- list("time"=time,"exo_hat"=exo_hat) + , + append = function(time, serie, exo) { + index <- length(private$.data) level = mean(serie, na.rm=TRUE) centered_serie = serie - level - private$.data[[length(private$.data)]]$centered_serie <- centered_serie, - private$.data[[length(private$.data)]]$level <- level, - private$.data[[length(private$.data)]]$exo <- exo, + private$.data[[index]]$time <- time + private$.data[[index]]$centered_serie <- centered_serie + private$.data[[index]]$level <- level + private$.data[[index]]$exo <- exo }, getTime = function(index) { index = dateIndexToInteger(index, self) private$.data[[index]]$time }, - getCenteredSerie = function(index, hat=FALSE) + getCenteredSerie = function(index) { index = dateIndexToInteger(index, self) - if (hat) - private$.data[[index]]$hat_centered_serie - else - private$.data[[index]]$centered_serie + private$.data[[index]]$centered_serie }, - getCenteredSeries = function(indices, hat=FALSE) - sapply(indices, function(i) self$getCenteredSerie(i, hat)) + getCenteredSeries = function(indices) + sapply(indices, function(i) self$getCenteredSerie(i)) , - getLevel = function(index, hat=FALSE) + getLevel = function(index) { index = dateIndexToInteger(index, self) - if (hat) - private$.data[[index]]$hat_level - else - private$.data[[index]]$level + private$.data[[index]]$level }, - getSerie = function(index, hat=FALSE) + getSerie = function(index) { index = dateIndexToInteger(index, self) - if (hat) - private$.data[[index]]$hat_centered_serie + private$.data[[index]]$hat_level - else - private$.data[[index]]$centered_serie + private$.data[[index]]$level + private$.data[[index]]$centered_serie + private$.data[[index]]$level }, - getSeries = function(indices, hat=FALSE) - sapply(indices, function(i) self$getSerie(i, hat)) + getSeries = function(indices) + sapply(indices, function(i) self$getSerie(i)) , - getExo = function(index, hat=FALSE) + getExoHat = function(index) + { + index = dateIndexToInteger(index, self) + private$.data[[index]]$exo_hat + }, + getExo = function(index) { index = dateIndexToInteger(index, self) - if (hat) - private$.data[[index]]$hat_exo - else - private$.data[[index]]$exo + private$.data[[index]]$exo }, removeFirst = function() private$.data <- private$.data[2:length(private$.data)] diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index c28125e..a5e3c3e 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -1,10 +1,9 @@ -#' @include Forecaster.R -#' #' Average Forecaster #' #' Return the (pointwise) average of the all the (similar) centered day curves #' in the past. Inherits \code{\link{Forecaster}} #' +#' @export AverageForecaster = R6::R6Class("AverageForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index c9eda05..52c2b35 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -1,5 +1,3 @@ -#' @include Forecaster.R -#' #' Neighbors Forecaster #' #' Predict tomorrow as a weighted combination of "futures of the past" days. @@ -125,7 +123,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", private$.params$indices <- fdays private$.params$window <- 1 } - return ( data$getSerie(fdays[1])[1:horizon] ) #what else?! + return ( data$getSerie(fdays[1])[1:horizon] ) } } else diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index 79cfe3c..ad83449 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -1,5 +1,3 @@ -#' @include Forecaster.R -#' #' Persistence Forecaster #' #' Return the last centered (similar) day curve. @@ -14,14 +12,12 @@ PersistenceForecaster = R6::R6Class("PersistenceForecaster", # Return centered last (similar) day curve, avoiding NAs until memory is run first_day = max(1, today-memory) same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE) - realtime = ifelse(hasArg("realtime"), list(...)$realtime, FALSE) # If 'same_day', get the last known future of similar day: -7 + 1 == -6 index = today - ifelse(same_day,6,0) repeat { { - last_serie = - data$getCenteredSerie(index,hat=(index==today && realtime))[1:horizon] + last_serie = data$getCenteredSerie(index)[1:horizon] index = index - ifelse(same_day,7,1) }; if (!any(is.na(last_serie))) diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index f5d6b89..ac970c4 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -1,5 +1,3 @@ -#' @include Forecaster.R -#' #' Zero Forecaster #' #' Return 0 (and then adjust). Inherits \code{\link{Forecaster}} @@ -9,6 +7,6 @@ ZeroForecaster = R6::R6Class("ZeroForecaster", public = list( predictShape = function(data, today, memory, horizon, ...) - rep(0., horizon) + rep(0, horizon) ) ) diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R index 351fbf9..12fc9d4 100644 --- a/pkg/R/J_Neighbors.R +++ b/pkg/R/J_Neighbors.R @@ -9,13 +9,12 @@ getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) filter = (params$indices >= first_day) indices = params$indices[filter] weights = params$weights[filter] - realtime = ifelse(hasArg("realtime"), list(...)$realtime, FALSE) if (any(is.na(weights) | is.na(indices))) return (NA) gaps = sapply(indices, function(i) { - data$getSerie(i+1,hat=(realtime && i+1==today))[1] - tail(data$getSerie(i), 1) + head( data$getSerie(i+1), 1) - tail( data$getSerie(i), 1) }) scal_product = weights * gaps norm_fact = sum( weights[!is.na(scal_product)] ) diff --git a/pkg/R/J_Persistence.R b/pkg/R/J_Persistence.R index 4d3abd2..a85a42a 100644 --- a/pkg/R/J_Persistence.R +++ b/pkg/R/J_Persistence.R @@ -8,13 +8,12 @@ getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...) #return gap between end of similar day curve and first day of tomorrow (in the past) first_day = max(1, today-memory) same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE) - realtime = ifelse(hasArg("realtime"), list(...)$realtime, FALSE) index = today - ifelse(same_day,7,1) repeat { { last_serie_end = tail( data$getSerie(index), 1) - last_tomorrow_begin = data$getSerie(index+1,hat=(realtime && index+1==today))[1] + last_tomorrow_begin = head( data$getSerie(index+1), 1) index = index - ifelse(same_day,7,1) }; if (!is.na(last_serie_end) && !is.na(last_tomorrow_begin)) diff --git a/pkg/R/computeError.R b/pkg/R/computeError.R index ce05fdb..7da1032 100644 --- a/pkg/R/computeError.R +++ b/pkg/R/computeError.R @@ -4,7 +4,8 @@ #' #' @param data Dataset, object of class \code{Data} output of \code{getData} #' @param pred Forecast object, class \code{Forecast} output of \code{computeForecast} -#' @param horizon Horizon where to compute the error (<= horizon used in \code{computeForecast}) +#' @param horizon Horizon where to compute the error +#' (<= horizon used in \code{computeForecast}) #' #' @return A list (abs,MAPE) of lists (day,indices) #' diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 198f6ec..1e79118 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -2,8 +2,9 @@ #' #' Predict time-series curves for the selected days indices (lines in data). #' -#' @param data Dataset, object of type \code{Data} output of \code{getData} -#' @param indices Days indices where to forecast (the day after) +#' @param data Object of type \code{Data}, output of \code{getData()} +#' @param indices Indices where to forecast (the day after); integers relative to the +#' beginning of data, or (convertible to) Date objects #' @param forecaster Name of the main forcaster #' \itemize{ #' \item Persistence : use values of last (similar, next) day @@ -20,8 +21,7 @@ #' @param memory Data depth (in days) to be used for prediction #' @param horizon Number of time steps to predict #' @param ncores Number of cores for parallel execution (1 to disable) -#' @param ... Additional parameters for the forecasting models; -#' In particular, realtime=TRUE to use predictions instead of measurements +#' @param ... Additional parameters for the forecasting models #' #' @return An object of class Forecast #' diff --git a/pkg/R/getData.R b/pkg/R/getData.R index b944dfb..b095d01 100644 --- a/pkg/R/getData.R +++ b/pkg/R/getData.R @@ -71,12 +71,10 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% { time = c() serie = c() - hat_serie = c() repeat { { time = c(time, ts_df[line,1]) - hat_serie = c(serie, ts_df[line,3]) serie = c(serie, ts_df[line,2]) line = line + 1 }; @@ -89,7 +87,7 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% hat_exo = as.data.frame( exo_df[i,(1+nb_exos+1):(1+2*nb_exos)] ) exo = as.data.frame( exo_df[i,2:(1+nb_exos)] ) - data$appendHat(time, hat_serie, hat_exo) + data$appendHat(time, hat_exo) data$append(serie, exo) #in realtime, this call comes hours later if (i >= limit) break -- 2.44.0 From e169b5d568110a86282877de4fc44384dc6d6cb0 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Thu, 6 Apr 2017 21:44:30 +0200 Subject: [PATCH 02/16] improve doc of computeForecast for realtime usage --- pkg/DESCRIPTION | 24 +++++++++---------- pkg/R/A_NAMESPACE.R | 12 ++++++++++ pkg/R/computeForecast.R | 53 +++++++++++++++++++++++++---------------- 3 files changed, 56 insertions(+), 33 deletions(-) diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION index a897984..3aab84c 100644 --- a/pkg/DESCRIPTION +++ b/pkg/DESCRIPTION @@ -25,19 +25,19 @@ URL: http://git.auder.net/?p=talweg.git License: MIT + file LICENSE RoxygenNote: 5.0.1 Collate: - 'A_NAMESPACE.R' - 'Data.R' - 'Forecaster.R' - 'F_Average.R' - 'F_Neighbors.R' - 'F_Persistence.R' - 'F_Zero.R' - 'Forecast.R' - 'J_Neighbors.R' - 'J_Persistence.R' - 'J_Zero.R' + 'plot.R' 'computeError.R' 'computeForecast.R' + 'J_Zero.R' + 'J_Persistence.R' + 'J_Neighbors.R' + 'F_Zero.R' + 'F_Persistence.R' + 'F_Neighbors.R' + 'F_Average.R' 'getData.R' - 'plot.R' 'utils.R' + 'Forecaster.R' + 'Forecast.R' + 'Data.R' + 'A_NAMESPACE.R' diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R index bcd5df1..1d78d9c 100644 --- a/pkg/R/A_NAMESPACE.R +++ b/pkg/R/A_NAMESPACE.R @@ -1,6 +1,18 @@ #' @include Data.R #' @include Forecast.R #' @include Forecaster.R +#' @include utils.R +#' @include getData.R +#' @include F_Average.R +#' @include F_Neighbors.R +#' @include F_Persistence.R +#' @include F_Zero.R +#' @include J_Neighbors.R +#' @include J_Persistence.R +#' @include J_Zero.R +#' @include computeForecast.R +#' @include computeError.R +#' @include plot.R #' #' @importFrom grDevices colors gray.colors #' @importFrom graphics abline hist par plot diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 1e79118..35372b3 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -2,40 +2,51 @@ #' #' Predict time-series curves for the selected days indices (lines in data). #' -#' @param data Object of type \code{Data}, output of \code{getData()} +#' @param data Object of type \code{Data}, output of \code{getData()}. #' @param indices Indices where to forecast (the day after); integers relative to the -#' beginning of data, or (convertible to) Date objects -#' @param forecaster Name of the main forcaster +#' beginning of data, or (convertible to) Date objects. +#' @param forecaster Name of the main forecaster; more details: ?F_ #' \itemize{ -#' \item Persistence : use values of last (similar, next) day -#' \item Neighbors : use values from the k closest neighbors' tomorrows -#' \item Average : global average of all the (similar) "tomorrow of past" +#' \item Persistence : use last (similar, next) day +#' \item Neighbors : weighted tomorrows of similar days +#' \item Average : average tomorrow of all same day-in-week #' \item Zero : just output 0 (benchmarking purpose) #' } -#' @param pjump How to predict the jump at the interface between two days ? +#' @param pjump Function to predict the jump at the interface between two days; +#' more details: ?J_ #' \itemize{ -#' \item Persistence : use last (similar) day values -#' \item Neighbors: re-use the weights optimized in corresponding forecaster +#' \item Persistence : use last (similar, next) day +#' \item Neighbors: re-use the weights from F_Neighbors #' \item Zero: just output 0 (no adjustment) #' } -#' @param memory Data depth (in days) to be used for prediction -#' @param horizon Number of time steps to predict -#' @param ncores Number of cores for parallel execution (1 to disable) -#' @param ... Additional parameters for the forecasting models +#' @param memory Data depth (in days) to be used for prediction. +#' @param horizon Number of time steps to predict. +#' @param ncores Number of cores for parallel execution (1 to disable). +#' @param ... Additional parameters for the forecasting models. #' #' @return An object of class Forecast #' #' @examples -#' ts_data = system.file("extdata","pm10_mesures_H_loc.csv",package="talweg") -#' exo_data = system.file("extdata","meteo_extra_noNAs.csv",package="talweg") -#' data = getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", predict_at=7) -#' pred = computeForecast(data, 2200:2230, "Persistence", "Persistence", 500, 12) +#' ts_data <- system.file("extdata","pm10_mesures_H_loc.csv",package="talweg") +#' exo_data <- system.file("extdata","meteo_extra_noNAs.csv",package="talweg") +#' data <- getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", predict_at=7) +#' pred <- computeForecast(data, 2200:2230, "Persistence", "Zero", +#' memory=500, horizon=12, ncores=1) #' \dontrun{#Sketch for real-time mode: -#' data = new("Data", ...) -#' forecaster = new(..., data=data) +#' data <- Data$new() +#' # Initialize: first day has no predictions attached +#' data$initialize() +#' forecaster <- MyForecaster$new(myJumpPredictFunc) #' repeat { -#' data$append(some_new_data) -#' pred = forecaster$predict(data$getSize(), ...) +#' # During the night between days j and j+1: +#' data$appendExoHat(exogenous_predictions) +#' # In the morning 7am+ or afternoon 1pm+: +#' data$setMeasures( +#' data$getSize()-1, +#' times_from_H+1_yersteday_to_Hnow, +#' PM10_values_of_last_24h, +#' exogenous_measures_for_yersteday) +#' pred <- forecaster$predictSerie(data, data$getSize()-1, ...) #' #do_something_with_pred #' }} #' @export -- 2.44.0 From c1be989885d1e402569d55a34aef01b57d6aea1c Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Thu, 6 Apr 2017 23:47:33 +0200 Subject: [PATCH 03/16] save current state --- pkg/R/Data.R | 2 +- pkg/R/computeForecast.R | 8 +++----- pkg/R/getData.R | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 53a8d5b..610c0c4 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -57,7 +57,7 @@ Data = R6::R6Class("Data", getStdHorizon = function() 24 - as.POSIXlt( private$.data[[1]]$time[1] )$hour + 1 , - appendHat = function(time, exo_hat) + append = function(time, exo_hat) private$.data[[length(private$.data)+1]] <- list("time"=time,"exo_hat"=exo_hat) , append = function(time, serie, exo) diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 35372b3..53dbc14 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -38,14 +38,12 @@ #' data$initialize() #' forecaster <- MyForecaster$new(myJumpPredictFunc) #' repeat { -#' # During the night between days j and j+1: -#' data$appendExoHat(exogenous_predictions) #' # In the morning 7am+ or afternoon 1pm+: -#' data$setMeasures( -#' data$getSize()-1, +#' data$append( #' times_from_H+1_yersteday_to_Hnow, #' PM10_values_of_last_24h, -#' exogenous_measures_for_yersteday) +#' exogenous_measures_of_last_24h, +#' exogenous_predictions_for_next_24h) #' pred <- forecaster$predictSerie(data, data$getSize()-1, ...) #' #do_something_with_pred #' }} diff --git a/pkg/R/getData.R b/pkg/R/getData.R index b095d01..4e2e3fd 100644 --- a/pkg/R/getData.R +++ b/pkg/R/getData.R @@ -85,10 +85,9 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% } } - hat_exo = as.data.frame( exo_df[i,(1+nb_exos+1):(1+2*nb_exos)] ) exo = as.data.frame( exo_df[i,2:(1+nb_exos)] ) - data$appendHat(time, hat_exo) - data$append(serie, exo) #in realtime, this call comes hours later + exo_hat = as.data.frame( exo_df[i,(1+nb_exos+1):(1+2*nb_exos)] ) + data$append(time, serie, exo, exo_hat) if (i >= limit) break i = i + 1 -- 2.44.0 From 4f5204f064d37ba0ec2988ba16a7011d7f0a45cd Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 7 Apr 2017 12:01:01 +0200 Subject: [PATCH 04/16] clarify data acquisition; TODO: improve doc + usage --- pkg/R/Data.R | 16 +++++++--------- pkg/R/computeForecast.R | 2 -- pkg/R/getData.R | 6 +++++- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 610c0c4..39f3837 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -57,18 +57,16 @@ Data = R6::R6Class("Data", getStdHorizon = function() 24 - as.POSIXlt( private$.data[[1]]$time[1] )$hour + 1 , - append = function(time, exo_hat) - private$.data[[length(private$.data)+1]] <- list("time"=time,"exo_hat"=exo_hat) - , - append = function(time, serie, exo) + append = function(time, serie, exo, exo_hat) { - index <- length(private$.data) level = mean(serie, na.rm=TRUE) centered_serie = serie - level - private$.data[[index]]$time <- time - private$.data[[index]]$centered_serie <- centered_serie - private$.data[[index]]$level <- level - private$.data[[index]]$exo <- exo + private$.data[[length(private$.data)+1]] <- list( + "time"=time, #H-24 --> H-1 + "centered_serie"=centered_serie, #at 'time' + "level"=level, #at 'time' + "exo"=exo, #at 'time' (yersteday 0am to last midnight) + "exo_hat"=exo_hat) #today 0am to next midnight }, getTime = function(index) { diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 53dbc14..4bab811 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -34,8 +34,6 @@ #' memory=500, horizon=12, ncores=1) #' \dontrun{#Sketch for real-time mode: #' data <- Data$new() -#' # Initialize: first day has no predictions attached -#' data$initialize() #' forecaster <- MyForecaster$new(myJumpPredictFunc) #' repeat { #' # In the morning 7am+ or afternoon 1pm+: diff --git a/pkg/R/getData.R b/pkg/R/getData.R index 4e2e3fd..268c54a 100644 --- a/pkg/R/getData.R +++ b/pkg/R/getData.R @@ -86,7 +86,11 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% } exo = as.data.frame( exo_df[i,2:(1+nb_exos)] ) - exo_hat = as.data.frame( exo_df[i,(1+nb_exos+1):(1+2*nb_exos)] ) + exo_hat = + if (i < nrow(exo_df)) + as.data.frame( exo_df[i+1,(1+nb_exos+1):(1+2*nb_exos)] ) + else + NA #exogenous prediction for next day are useless on last day data$append(time, serie, exo, exo_hat) if (i >= limit) break -- 2.44.0 From c4c329f65e6e842917cdfbabff36fbca6a617d02 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 7 Apr 2017 16:41:12 +0200 Subject: [PATCH 05/16] work on doc --- pkg/R/F_Average.R | 7 ++++--- pkg/R/F_Neighbors.R | 10 ++++++++-- pkg/R/F_Persistence.R | 7 +++++-- pkg/R/F_Zero.R | 5 ++++- pkg/R/computeForecast.R | 4 +++- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index a5e3c3e..d7febd4 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -1,9 +1,10 @@ #' Average Forecaster #' -#' Return the (pointwise) average of the all the (similar) centered day curves -#' in the past. Inherits \code{\link{Forecaster}} +#' Pointwise average of all series of the same (day of week) days in the past. +#' +#' @format R6 class, inherits Forecaster +#' @alias F_Average #' -#' @export AverageForecaster = R6::R6Class("AverageForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 52c2b35..7e0cdd2 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -1,7 +1,13 @@ #' Neighbors Forecaster #' -#' Predict tomorrow as a weighted combination of "futures of the past" days. -#' Inherits \code{\link{Forecaster}} +#' Predict next serie as a weighted combination of "futures of the past" days, +#' where days in the past are chosen and weighted according to some similarity measures. +#' See 'details' section. +#' +#' TODO: details. +#' +#' @format R6 class, inherits Forecaster +#' @alias F_Neighbors #' NeighborsForecaster = R6::R6Class("NeighborsForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index ad83449..754df07 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -1,7 +1,10 @@ #' Persistence Forecaster #' -#' Return the last centered (similar) day curve. -#' Inherits \code{\link{Forecaster}} +#' Look for the most recent similar day in the past, and return its corresponding curve. +#' "Similar day" means "same day in the week". +#' +#' @format R6 class, inherits Forecaster +#' @alias F_Persistence #' PersistenceForecaster = R6::R6Class("PersistenceForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index ac970c4..3c46f2c 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -1,6 +1,9 @@ #' Zero Forecaster #' -#' Return 0 (and then adjust). Inherits \code{\link{Forecaster}} +#' Flat prediction of next series of 'horizon' values. +#' +#' @format R6 class, inherits Forecaster +#' @alias F_Zero #' ZeroForecaster = R6::R6Class("ZeroForecaster", inherit = Forecaster, diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 4bab811..1d69777 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -1,6 +1,8 @@ #' Compute forecast #' -#' Predict time-series curves for the selected days indices (lines in data). +#' Predict time-series curves for the selected days indices. +#' +#' TODO: details #' #' @param data Object of type \code{Data}, output of \code{getData()}. #' @param indices Indices where to forecast (the day after); integers relative to the -- 2.44.0 From 102bcfda4afbb5cfee885cbee0f55545624168fd Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Sun, 9 Apr 2017 20:15:09 +0200 Subject: [PATCH 06/16] Improve documentation --- pkg/R/A_NAMESPACE.R | 13 ++++++------ pkg/R/Data.R | 47 +++++++++++++++++++++++------------------ pkg/R/F_Average.R | 8 ++++++- pkg/R/F_Neighbors.R | 27 +++++++++++++++++++++-- pkg/R/F_Persistence.R | 8 ++++++- pkg/R/F_Zero.R | 4 +++- pkg/R/Forecast.R | 23 ++++++++++++++------ pkg/R/Forecaster.R | 35 ++++++++++++++++++++++-------- pkg/R/J_Neighbors.R | 9 +++++++- pkg/R/J_Persistence.R | 9 +++++++- pkg/R/J_Zero.R | 6 +++++- pkg/R/computeError.R | 11 ++++++---- pkg/R/computeForecast.R | 10 ++++----- pkg/R/getData.R | 19 +++++++++-------- pkg/R/plot.R | 39 +++++++++++++++++----------------- pkg/R/utils.R | 30 +++++++++++++------------- 16 files changed, 194 insertions(+), 104 deletions(-) diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R index 1d78d9c..466293c 100644 --- a/pkg/R/A_NAMESPACE.R +++ b/pkg/R/A_NAMESPACE.R @@ -1,3 +1,10 @@ +#' @importFrom grDevices colors gray.colors +#' @importFrom graphics abline hist par plot +#' @importFrom methods hasArg is +#' @importFrom stats quantile sd +#' @importFrom utils getFromNamespace read.csv tail +#' @importFrom R6 R6Class +#' #' @include Data.R #' @include Forecast.R #' @include Forecaster.R @@ -14,10 +21,4 @@ #' @include computeError.R #' @include plot.R #' -#' @importFrom grDevices colors gray.colors -#' @importFrom graphics abline hist par plot -#' @importFrom methods hasArg is -#' @importFrom stats quantile sd -#' @importFrom utils getFromNamespace read.csv tail -#' NULL diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 39f3837..c193f7d 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -1,51 +1,56 @@ #' Data #' -#' Data encapsulation +#' Data encapsulation as a list (days) of lists (components). #' -#' @docType class -#' @importFrom R6 R6Class +#' The private field .data is a list where each cell contains the variables for a period +#' of time of 24 hours, from time P+1 until P the next day where P is an integer between +#' 0 and 23. .data[[1]] refers to the first measured data (serie and exogenous +#' variables): the corresponding times can be accessed through the function +#' \code{getTime()} below. Each cell .data[[i]] is itself a list containing five slots, +#' as described in the 'field' section. #' -#' @field .data List of +#' @field .data[[i]] List of #' \itemize{ #' \item time: vector of times -#' \item centered_serie: centered series -#' \item level: corresponding levels +#' \item centered_serie: centered serie +#' \item level: corresponding level #' \item exo: exogenous variables -#' \item exo_hat: predicted exogenous variables +#' \item exo_hat: predicted exogenous variables (for next day) #' } #' #' @section Methods: #' \describe{ #' \item{\code{getSize()}}{ -#' Return number of series in dataset.} +#' Number of series in dataset.} #' \item{\code{getStdHorizon()}}{ -#' Return number of time steps from serie[1] until midnight} -#' \item{\code{appendHat(time, exo_hat)}}{ -#' New estimated exogenous variables + time} -#' \item{\code{append(serie, exo)}}{ -#' New measured data; call *after* \code{appendHat()}} +#' Number of time steps from serie[1] until midnight} +#' \item{\code{append(time, serie, exo, exo_hat)}}{ +#' Measured data for given vector of times + exogenous predictions from last midgnight. #' \item{\code{getTime(index)}}{ -#' Get times at specified index.} +#' Times (vector) at specified index.} #' \item{\code{getCenteredSerie(index)}}{ -#' Get (measured or predicted) centered serie at specified index.} +#' Centered serie at specified index.} #' \item{\code{getCenteredSeries(indices)}}{ -#' Get centered series at specified indices (in columns).} +#' Centered series at specified indices (in columns).} #' \item{\code{getLevel(index)}}{ -#' Get level at specified index.} +#' Level at specified index.} #' \item{\code{getSerie(index)}}{ -#' Get serie (centered+level) at specified index.} +#' Serie (centered+level) at specified index.} #' \item{\code{getSeries(indices)}}{ -#' Get series at specified indices (in columns).} +#' Series at specified indices (in columns).} #' \item{\code{getExoHat(index)}}{ -#' Get predicted exogenous variables at specified index.} +#' Predicted exogenous variables at specified index.} #' \item{\code{getExo(index)}}{ -#' Get exogenous variables at specified index.} +#' Measured exogenous variables at specified index.} #' \item{\code{removeFirst()}}{ #' Remove first list element (if truncated).} #' \item{\code{removeLast()}}{ #' Remove last list element (if truncated).} #' } #' +#' @docType class +#' @format R6 class +#' Data = R6::R6Class("Data", private = list( .data = list() diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index d7febd4..8a33111 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -1,7 +1,13 @@ #' Average Forecaster #' -#' Pointwise average of all series of the same (day of week) days in the past. +#' Pointwise average of all the series of the same day of week in the past. #' +#' For example, if the current day (argument "today") is a tuesday, then all series +#' corresponding to wednesdays in the past (until the beginning or memory limit) are +#' averaged to provide a smooth prediction. This forecast will most of the time be wrong, +#' but will also look plausible enough. +#' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Average #' diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 7e0cdd2..12595d9 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -2,10 +2,33 @@ #' #' Predict next serie as a weighted combination of "futures of the past" days, #' where days in the past are chosen and weighted according to some similarity measures. -#' See 'details' section. #' -#' TODO: details. +#' The main method is \code{predictShape()}, taking arguments data, today, memory, +#' horizon respectively for the dataset (object output of \code{getData()}), the current +#' index, the data depth (in days) and the number of time steps to forecast. +#' In addition, optional arguments can be passed: +#' \itemize{ +#' \item local : TRUE (default) to constrain neighbors to be "same days within same +#' season" +#' \item simtype : 'endo' for a similarity based on the series only, +#' 'exo' for a similaruty based on exogenous variables only, +#' 'mix' for the product of 'endo' and 'exo', +#' 'none' (default) to apply a simple average: no computed weights +#' \item window : A window for similarities computations; override cross-validation +#' window estimation. +#' } +#' The method is summarized as follows: +#' \enumerate{ +#' \item Determine N (=20) recent days without missing values, and followed by a +#' tomorrow also without missing values. +#' \item Optimize the window parameters (if relevant) on the N chosen days. +#' \item Considering the optimized window, compute the neighbors (with locality +#' constraint or not), compute their similarities -- using a gaussian kernel if +#' simtype != "none" -- and average accordingly the "tomorrows of neigbors" to +#' obtain the final prediction. +#' } #' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Neighbors #' diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index 754df07..eaeca98 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -1,8 +1,14 @@ #' Persistence Forecaster #' #' Look for the most recent similar day in the past, and return its corresponding curve. -#' "Similar day" means "same day in the week". #' +#' There are two variations, depending whether "similar day" means "same day in the week" +#' or "most recent day" (regardless of day type). The corresponding argument is named +#' "same_day": a value of TRUE implies the former interpretation (same day in week). +#' If the last similar day has missing values, the next one is searched, and so on until +#' one full serie is found (if no one is found, NA is returned). +#' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Persistence #' diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index 3c46f2c..977cba9 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -1,7 +1,9 @@ #' Zero Forecaster #' -#' Flat prediction of next series of 'horizon' values. +#' Flat prediction: always forecast a serie of zeros. +#' This serie is then adjusted using the ".pjump" function (see \code{Forecaster} class). #' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Zero #' diff --git a/pkg/R/Forecast.R b/pkg/R/Forecast.R index a0ddcda..6036daf 100644 --- a/pkg/R/Forecast.R +++ b/pkg/R/Forecast.R @@ -1,15 +1,21 @@ #' Forecast #' -#' Forecast encapsulation +#' Forecast encapsulation as a list (days where prediction occur) of lists (components). #' -#' @docType class -#' @importFrom R6 R6Class +#' The private field .pred is a list where each cell contains the predicted variables for +#' a period of time of H<=24 hours, from hour P+1 until P+H, where P+1 is taken right +#' after the end of the period designated by \code{getIndexInData()}. In other terms, +#' \code{forecast$getForecast(i)} return forecasts for \code{data$getSerie(i+1)}. +#' Each cell .pred[[i]] is itself a list containing three slots, as described in the +#' 'field' section. #' -#' @field .pred List with \itemize{ -#' \item serie: forecasted serie +#' @field .pred List with +#' \itemize{ +#' \item serie: the forecasted serie #' \item params: corresponding list of parameters (weights, neighbors...) -#' \item index_in_data: corresponding index in data object} -#' @field .dates vector of day indices where forcast occurs +#' \item index_in_data: corresponding index in data object +#' } +#' @field .dates vector of (integer) day indices where forecast occurs #' #' @section Methods: #' \describe{ @@ -30,6 +36,9 @@ #' Get index in data which corresponds to current forecast.} #' } #' +#' @docType class +#' @format R6 class +#' Forecast = R6::R6Class("Forecast", private = list( .pred = list(), diff --git a/pkg/R/Forecaster.R b/pkg/R/Forecaster.R index cedb2b6..2efa9ba 100644 --- a/pkg/R/Forecaster.R +++ b/pkg/R/Forecaster.R @@ -1,27 +1,44 @@ #' Forecaster #' -#' Forecaster (abstract class, implemented by all forecasters) +#' Forecaster (abstract class, implemented by all forecasters). #' -#' @docType class -#' @importFrom R6 R6Class +#' A Forecaster object encapsulates parameters (which can be of various kinds, for +#' example "Neighbors" method stores informations about the considered neighborhood for +#' the current prediction task) and one main function: \code{predictSerie()}. This last +#' function (by default) calls \code{predictShape()} to get a forecast of a centered +#' serie, and then calls the "jump prediction" function -- see "field" section -- to +#' adjust it based on the last observed values. #' -#' @field .params List of computed parameters, for post-run analysis (dev) -#' @field .pjump Function: how to predict the jump at day interface ? +#' @field .params List of computed parameters (if applicable). +#' @field .pjump Function: how to predict the jump at day interface? The arguments of +#' this function are -- in this order: +#' \itemize{ +#' \item data : object output of \code{getData()}, +#' \item today : index (integer or date) of the last known day in data, +#' \item memory : number of days to use in the past (including today), +#' \item horizon : number of time steps to predict, +#' \item params : optimized parameters in the main method \code{predictShape()}, +#' \item ... : additional arguments. +#' } +#' .pjump returns an estimation of the jump after the last observed value. #' #' @section Methods: #' \describe{ #' \item{\code{initialize(data, pjump)}}{ #' Initialize a Forecaster object with a Data object and a jump prediction function.} #' \item{\code{predictSerie(today,memory,horizon,...)}}{ -#' Predict a new serie of \code{horizon} values at day index \code{today} -#' using \code{memory} days in the past.} +#' Predict a new serie of \code{horizon} values at day index \code{today} using +#' \code{memory} days in the past.} #' \item{\code{predictShape(today,memory,horizon,...)}}{ -#' Predict a new shape of \code{horizon} values at day index \code{today} -#' using \code{memory} days in the past.} +#' Predict a new shape of \code{horizon} values at day index \code{today} using +#' \code{memory} days in the past.} #' \item{\code{getParameters()}}{ #' Return (internal) parameters.} #' } #' +#' @docType class +#' @format R6 class +#' Forecaster = R6::R6Class("Forecaster", private = list( .params = list(), diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R index 12fc9d4..5a818ca 100644 --- a/pkg/R/J_Neighbors.R +++ b/pkg/R/J_Neighbors.R @@ -1,8 +1,15 @@ -#' Obtain jump forecast by the Neighbors method +#' getNeighborsJumpPredict +#' +#' Apply optimized weights on gaps observed on selected neighbors. +#' This jump prediction method can only be used in conjunction with the Neighbors +#' Forecaster, because it makes use of the optimized parameters to re-apply the weights +#' on the jumps observed at days interfaces of the past neighbors. #' #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' +#' @alias J_Neighbors +#' getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) { first_day = max(1, today-memory) diff --git a/pkg/R/J_Persistence.R b/pkg/R/J_Persistence.R index a85a42a..37d05fe 100644 --- a/pkg/R/J_Persistence.R +++ b/pkg/R/J_Persistence.R @@ -1,8 +1,15 @@ -#' Obtain jump forecast by the Persistence method +#' getPersistenceJumpPredict +#' +#' Analog of the PersistenceForecaster: predict the jump after last observed value either +#' by re-applying the last jump between similar day and its follower (if argument +#' "same_day" is TRUE), or by re-using the very last observed jump (when "same_day" = +#' FALSE). #' #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' +#' @alias J_Persistence +#' getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...) { #return gap between end of similar day curve and first day of tomorrow (in the past) diff --git a/pkg/R/J_Zero.R b/pkg/R/J_Zero.R index d9140a5..065226e 100644 --- a/pkg/R/J_Zero.R +++ b/pkg/R/J_Zero.R @@ -1,9 +1,13 @@ -#' Just predict zero "jump" (for reference, benchmarking at least) +#' getZeroJumpPredict +#' +#' Just predict zero "jump" (for reference, benchmarking at least). #' #' @inheritParams computeForecast #' @param today Index of the current day (predict tomorrow) #' @param params Optional parameters computed by the main forecaster #' +#' @alias J_Zero +#' getZeroJumpPredict = function(data, today, memory, horizon, params, ...) { 0 diff --git a/pkg/R/computeError.R b/pkg/R/computeError.R index 7da1032..1fbb2a8 100644 --- a/pkg/R/computeError.R +++ b/pkg/R/computeError.R @@ -1,13 +1,16 @@ #' Compute error #' -#' Obtain the errors between forecast and data +#' Compute the errors between forecasted and measured series. #' -#' @param data Dataset, object of class \code{Data} output of \code{getData} -#' @param pred Forecast object, class \code{Forecast} output of \code{computeForecast} +#' @param data Object of class \code{Data} output of \code{getData} +#' @param pred Object of class \code{Forecast} output of \code{computeForecast} #' @param horizon Horizon where to compute the error #' (<= horizon used in \code{computeForecast}) #' -#' @return A list (abs,MAPE) of lists (day,indices) +#' @return A list (abs,MAPE) of lists (day,indices). The "indices" slots contain series +#' of size L where L is the number of predicted days; i-th value is the averaged error +#' (absolute or MAPE) on day i. The "day" slots contain curves of errors, for each time +#' step, averaged on the L forecasting days. #' #' @export computeError = function(data, pred, horizon=data$getStdHorizon()) diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 1d69777..23ccb04 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -1,10 +1,10 @@ #' Compute forecast #' -#' Predict time-series curves for the selected days indices. +#' Predict time-series curves ("tomorrows") at the selected days indices ("todays"). +#' This function just runs a loop over all requested indices, and stores the individual +#' forecasts into a list which is then turned into a Forecast object. #' -#' TODO: details -#' -#' @param data Object of type \code{Data}, output of \code{getData()}. +#' @param data Object of class Data, output of \code{getData()}. #' @param indices Indices where to forecast (the day after); integers relative to the #' beginning of data, or (convertible to) Date objects. #' @param forecaster Name of the main forecaster; more details: ?F_ @@ -44,7 +44,7 @@ #' PM10_values_of_last_24h, #' exogenous_measures_of_last_24h, #' exogenous_predictions_for_next_24h) -#' pred <- forecaster$predictSerie(data, data$getSize()-1, ...) +#' pred <- forecaster$predictSerie(data, data$getSize(), ...) #' #do_something_with_pred #' }} #' @export diff --git a/pkg/R/getData.R b/pkg/R/getData.R index 268c54a..a6401f9 100644 --- a/pkg/R/getData.R +++ b/pkg/R/getData.R @@ -1,19 +1,20 @@ -#' @title Acquire data in a clean format +#' getData #' -#' @description Take in input data frames and/or files containing raw data, and -#' timezones, and output a Data object, roughly corresponding to a list where each cell -#' contains all value for one day (see \code{?Data}). +#' Acquire data as a Data object; see ?Data. +#' +#' Since series are given in columns (database format), this function builds series one +#' by one and incrementally grows a Data object which is finally returned. #' #' @param ts_data Time-series, as a data frame (DB style: 2 columns, first is date/time, -#' second is value) or a CSV file -#' @param exo_data Exogenous variables, as a data frame or a CSV file; first comlumn is +#' second is value) or a CSV file. +#' @param exo_data Exogenous variables, as a data frame or a CSV file; first column is #' dates, next block are measurements for the day, and final block are exogenous -#' forecasts +#' forecasts (for the same day). #' @param input_tz Timezone in the input files ("GMT" or e.g. "Europe/Paris") #' @param date_format How date/time are stored (e.g. year/month/day hour:minutes; -#' see \code{strptime}) +#' see ?strptime) #' @param working_tz Timezone to work with ("GMT" or e.g. "Europe/Paris") -#' @param predict_at When does the prediction take place ? Integer, in hours. Default: 0 +#' @param predict_at When does the prediction take place? Integer, in hours. Default: 0 #' @param limit Number of days to extract (default: Inf, for "all") #' #' @return An object of class Data diff --git a/pkg/R/plot.R b/pkg/R/plot.R index 5ea4843..48b456c 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -1,8 +1,8 @@ #' Plot curves #' -#' Plot a range of curves in data +#' Plot a range of curves in data. #' -#' @param data Object of class Data +#' @inheritParams computeError #' @param indices Range of indices (integers or dates) #' #' @export @@ -22,9 +22,9 @@ plotCurves <- function(data, indices=seq_len(data$getSize())) #' Plot error #' -#' Draw error graphs, potentially from several runs of \code{computeForecast} +#' Draw error graphs, potentially from several runs of \code{computeForecast()}. #' -#' @param err Error as returned by \code{computeError} +#' @param err Error as returned by \code{computeError()} #' @param cols Colors for each error (default: 1,2,3,...) #' #' @seealso \code{\link{plotCurves}}, \code{\link{plotPredReal}}, @@ -74,10 +74,9 @@ plotError <- function(err, cols=seq_along(err)) #' Plot measured / predicted #' -#' Plot measured curve (in black) and predicted curve (in blue) +#' Plot measured curve (in black) and predicted curve (in blue). #' -#' @param data Object return by \code{getData} -#' @param pred Object as returned by \code{computeForecast} +#' @inheritParams computeError #' @param index Index in forecasts (integer or date) #' #' @export @@ -95,9 +94,9 @@ plotPredReal <- function(data, pred, index) #' Plot similarities #' -#' Plot histogram of similarities (weights) +#' Plot histogram of similarities (weights), for 'Neighbors' method. #' -#' @param pred Object as returned by \code{computeForecast} +#' @inheritParams computeError #' @param index Index in forecasts (integer or date) #' #' @export @@ -112,10 +111,10 @@ plotSimils <- function(pred, index) #' Functional boxplot #' -#' Draw the functional boxplot on the left, and bivariate plot on the right +#' Draw the functional boxplot on the left, and bivariate plot on the right. #' -#' @param data Object return by \code{getData} -#' @param indices integer or date indices to process +#' @inheritParams computeError +#' @inheritParams plotCurves #' #' @export plotFbox <- function(data, indices=seq_len(data$getSize())) @@ -138,10 +137,10 @@ plotFbox <- function(data, indices=seq_len(data$getSize())) #' Compute filaments #' -#' Get similar days in the past, as black as distances are small +#' Obtain similar days in the past, and (optionally) plot them -- as black as distances +#' are small. #' -#' @param data Object as returned by \code{getData} -#' @param pred Object of class Forecast +#' @inheritParams computeError #' @param index Index in forecast (integer or date) #' @param limit Number of neighbors to consider #' @param plot Should the result be plotted? @@ -197,9 +196,9 @@ computeFilaments <- function(data, pred, index, limit=60, plot=TRUE) #' Functional boxplot on filaments #' -#' Draw the functional boxplot on filaments obtained by \code{computeFilaments} +#' Draw the functional boxplot on filaments obtained by \code{computeFilaments()}. #' -#' @param data Object return by \code{getData} +#' @inheritParams computeError #' @param fil Output of \code{computeFilaments} #' #' @export @@ -227,10 +226,10 @@ plotFilamentsBox = function(data, fil) #' Plot relative conditional variability / absolute variability #' #' Draw the relative conditional variability / absolute variability based on filaments -#' obtained by \code{computeFilaments} +#' obtained by \code{computeFilaments()}. #' -#' @param data Object return by \code{getData} -#' @param fil Output of \code{computeFilaments} +#' @inheritParams computeError +#' @inheritParams plotFilamentsBox #' #' @export plotRelVar = function(data, fil) diff --git a/pkg/R/utils.R b/pkg/R/utils.R index bbf1387..3124881 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -1,14 +1,14 @@ #' dateIndexToInteger #' -#' Transform a (potential) date index into an integer (relative to data) +#' Transform a (potential) date index into an integer (relative to data beginning). #' #' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @param data Object of class Data, output of \code{getData()} #' #' @export dateIndexToInteger = function(index, data) { - #works on integers too: trust input + # Works on integers too: trust input if (is.numeric(index)) index = as.integer(index) if (is.integer(index)) @@ -30,15 +30,14 @@ dateIndexToInteger = function(index, data) #' integerIndexToDate #' -#' Transform an integer index to date index (relative to data) +#' Transform an integer index (relative to data beginning) into a date index. #' -#' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @inheritParams dateIndexToInteger #' #' @export integerIndexToDate = function(index, data) { - #works on dates too: trust input + # Works on dates too: trust input if (is.character(index)) index = as.Date(index) if (is(index,"Date")) @@ -54,7 +53,8 @@ integerIndexToDate = function(index, data) #' getSimilarDaysIndices #' -#' Find similar days indices in the past. +#' Find similar days indices in the past; at least same type of day in the week: +#' monday=tuesday=wednesday=thursday != friday != saturday != sunday. #' #' @param index Day index (numeric or date) #' @param data Reference dataset, object output of \code{getData} @@ -88,10 +88,10 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) # isSameSeason # -# Check if two months fall in the same "season" (defined by estimated pollution rate) +# Check if two months fall in the same "season" (defined by estimated pollution rate). # -# @param month month index to test -# @param month_ref month to compare to +# @param month Month index to test +# @param month_ref Month to compare to # .isSameSeason = function(month, month_ref) { @@ -104,10 +104,10 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) # isSameDay # -# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials +# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials. # -# @param day day index to test -# @param day_ref day index to compare to +# @param day Day index to test +# @param day_ref Day index to compare to # .isSameDay = function(day, day_ref) { @@ -122,7 +122,7 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) #' #' Get indices in data of no-NA series followed by no-NA, within [first,last] range. #' -#' @param data Object of class Data +#' @inheritParams dateIndexToInteger #' @param first First index (included) #' @param last Last index (included) #' -- 2.44.0 From 3ddf1c12af0c167fe7d3bb59e63258550270cfc5 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Sun, 9 Apr 2017 23:34:35 +0200 Subject: [PATCH 07/16] TODO: unit tests for simil days --- pkg/R/Data.R | 3 +- pkg/R/F_Average.R | 2 +- pkg/R/F_Neighbors.R | 116 ++++++++++++------ pkg/R/F_Persistence.R | 2 +- pkg/R/F_Zero.R | 2 +- pkg/R/J_Neighbors.R | 2 +- pkg/R/J_Persistence.R | 2 +- pkg/R/J_Zero.R | 2 +- pkg/R/computeForecast.R | 7 +- pkg/R/utils.R | 16 --- pkg/tests/testthat.R | 4 +- ...ndexToInteger.R => test-DateIntegerConv.R} | 46 +++---- .../{test.Forecaster.R => test-Forecaster.R} | 31 +++-- ...uteFilaments.R => test-computeFilaments.R} | 2 +- pkg/tests/testthat/test-similarDays.R | 16 +++ 15 files changed, 152 insertions(+), 101 deletions(-) rename pkg/tests/testthat/{test.dateIndexToInteger.R => test-DateIntegerConv.R} (68%) rename pkg/tests/testthat/{test.Forecaster.R => test-Forecaster.R} (93%) rename pkg/tests/testthat/{test.computeFilaments.R => test-computeFilaments.R} (97%) create mode 100644 pkg/tests/testthat/test-similarDays.R diff --git a/pkg/R/Data.R b/pkg/R/Data.R index c193f7d..92ba2c1 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -25,7 +25,8 @@ #' \item{\code{getStdHorizon()}}{ #' Number of time steps from serie[1] until midnight} #' \item{\code{append(time, serie, exo, exo_hat)}}{ -#' Measured data for given vector of times + exogenous predictions from last midgnight. +#' Measured data for given vector of times + exogenous predictions from +#' last midgnight.} #' \item{\code{getTime(index)}}{ #' Times (vector) at specified index.} #' \item{\code{getCenteredSerie(index)}}{ diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index 8a33111..8f81747 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -9,7 +9,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Average +#' @aliases F_Average #' AverageForecaster = R6::R6Class("AverageForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 12595d9..ffb068f 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -30,7 +30,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Neighbors +#' @aliases F_Neighbors #' NeighborsForecaster = R6::R6Class("NeighborsForecaster", inherit = Forecaster, @@ -46,7 +46,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", return (NA) # Determine indices of no-NAs days followed by no-NAs tomorrows - fdays = getNoNA2(data, max(today-memory,1), today-1) + fdays = .getNoNA2(data, max(today-memory,1), today-1) # Get optional args local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season? @@ -118,33 +118,14 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", if (local) { - # Neighbors: days in "same season"; TODO: 60 == magic number... + # TODO: 60 == magic number fdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE, days_in=fdays_cut) if (length(fdays) <= 1) return (NA) - levelToday = data$getLevel(today) - distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday)) - #TODO: 2, 10, 3, 12 magic numbers here... - dist_thresh = 2 - min_neighbs = min(10,length(fdays)) - repeat - { - same_pollution = (distances <= dist_thresh) - nb_neighbs = sum(same_pollution) - if (nb_neighbs >= min_neighbs) #will eventually happen - break - dist_thresh = dist_thresh + 3 - } - fdays = fdays[same_pollution] - max_neighbs = 12 - if (nb_neighbs > max_neighbs) - { - # Keep only max_neighbs closest neighbors - fdays = fdays[ - sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ] - } - if (length(fdays) == 1) #the other extreme... + # TODO: 10, 12 == magic numbers + fdays = .getConstrainedNeighbs(today,data,fdays,min_neighbs=10,max_neighbs=12) + if (length(fdays) == 1) { if (final_call) { @@ -170,13 +151,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", mean(delta^2) }) - sd_dist = sd(distances2) - if (sd_dist < .25 * sqrt(.Machine$double.eps)) - { -# warning("All computed distances are very close: stdev too small") - sd_dist = 1 #mostly for tests... FIXME: - } - simils_endo = exp(-distances2/(sd_dist*window_endo^2)) + simils_endo <- .computeSimils(distances2, window_endo) } if (simtype == "exo" || simtype == "mix") @@ -203,13 +178,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", delta %*% sigma_inv %*% delta }) - sd_dist = sd(distances2) - if (sd_dist < .25 * sqrt(.Machine$double.eps)) - { -# warning("All computed distances are very close: stdev too small") - sd_dist = 1 #mostly for tests... FIXME: - } - simils_exo = exp(-distances2/(sd_dist*window_exo^2)) + simils_exo <- .computeSimils(distances2, window_exo) } similarities = @@ -246,3 +215,72 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", } ) ) + +#' getNoNA2 +#' +#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. +#' +#' @inheritParams dateIndexToInteger +#' @param first First index (included) +#' @param last Last index (included) +#' +.getNoNA2 = function(data, first, last) +{ + (first:last)[ sapply(first:last, function(i) + !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) + ) ] +} + +#' getConstrainedNeighbs +#' +#' Get indices of neighbors of similar pollution level (among same season + day type). +#' +#' @param today Index of current day +#' @param data Object of class Data +#' @param fdays Current set of "first days" (no-NA pairs) +#' @param min_neighbs Minimum number of points in a neighborhood +#' @param max_neighbs Maximum number of points in a neighborhood +#' +.getConstrainedNeighbs = function(today, data, fdays, min_neighbs=10, max_neighbs=12) +{ + levelToday = data$getLevel(today) + distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday)) + #TODO: 2, +3 : magic numbers + dist_thresh = 2 + min_neighbs = min(min_neighbs,length(fdays)) + repeat + { + same_pollution = (distances <= dist_thresh) + nb_neighbs = sum(same_pollution) + if (nb_neighbs >= min_neighbs) #will eventually happen + break + dist_thresh = dist_thresh + 3 + } + fdays = fdays[same_pollution] + max_neighbs = 12 + if (nb_neighbs > max_neighbs) + { + # Keep only max_neighbs closest neighbors + fdays = fdays[ + sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ] + } + fdsays +} + +#' compute similarities +#' +#' Apply the gaussian kernel on computed squared distances. +#' +#' @param distances2 Squared distances +#' @param window Window parameter for the kernel +#' +.computeSimils <- function(distances2, window) +{ + sd_dist = sd(distances2) + if (sd_dist < .25 * sqrt(.Machine$double.eps)) + { +# warning("All computed distances are very close: stdev too small") + sd_dist = 1 #mostly for tests... FIXME: + } + exp(-distances2/(sd_dist*window^2)) +} diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index eaeca98..fa5f99f 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -10,7 +10,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Persistence +#' @aliases F_Persistence #' PersistenceForecaster = R6::R6Class("PersistenceForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index 977cba9..2c9a7b7 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -5,7 +5,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Zero +#' @aliases F_Zero #' ZeroForecaster = R6::R6Class("ZeroForecaster", inherit = Forecaster, diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R index 5a818ca..7f66830 100644 --- a/pkg/R/J_Neighbors.R +++ b/pkg/R/J_Neighbors.R @@ -8,7 +8,7 @@ #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' -#' @alias J_Neighbors +#' @aliases J_Neighbors #' getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) { diff --git a/pkg/R/J_Persistence.R b/pkg/R/J_Persistence.R index 37d05fe..9e56742 100644 --- a/pkg/R/J_Persistence.R +++ b/pkg/R/J_Persistence.R @@ -8,7 +8,7 @@ #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' -#' @alias J_Persistence +#' @aliases J_Persistence #' getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...) { diff --git a/pkg/R/J_Zero.R b/pkg/R/J_Zero.R index 065226e..fb15e3d 100644 --- a/pkg/R/J_Zero.R +++ b/pkg/R/J_Zero.R @@ -6,7 +6,7 @@ #' @param today Index of the current day (predict tomorrow) #' @param params Optional parameters computed by the main forecaster #' -#' @alias J_Zero +#' @aliases J_Zero #' getZeroJumpPredict = function(data, today, memory, horizon, params, ...) { diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 23ccb04..a4a539a 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -31,9 +31,10 @@ #' @examples #' ts_data <- system.file("extdata","pm10_mesures_H_loc.csv",package="talweg") #' exo_data <- system.file("extdata","meteo_extra_noNAs.csv",package="talweg") -#' data <- getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", predict_at=7) -#' pred <- computeForecast(data, 2200:2230, "Persistence", "Zero", -#' memory=500, horizon=12, ncores=1) +#' data <- getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", +#' predict_at=7, limit=200) +#' pred <- computeForecast(data, 100:130, "Persistence", "Zero", +#' memory=50, horizon=12, ncores=1) #' \dontrun{#Sketch for real-time mode: #' data <- Data$new() #' forecaster <- MyForecaster$new(myJumpPredictFunc) diff --git a/pkg/R/utils.R b/pkg/R/utils.R index 3124881..5ba72f0 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -117,19 +117,3 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) return (day <= 4) return (day == day_ref) } - -#' getNoNA2 -#' -#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. -#' -#' @inheritParams dateIndexToInteger -#' @param first First index (included) -#' @param last Last index (included) -#' -#' @export -getNoNA2 = function(data, first, last) -{ - (first:last)[ sapply(first:last, function(i) - !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) - ) ] -} diff --git a/pkg/tests/testthat.R b/pkg/tests/testthat.R index 2a60ff4..97e5ad5 100644 --- a/pkg/tests/testthat.R +++ b/pkg/tests/testthat.R @@ -1,4 +1,6 @@ library(testthat) -library(talweg) + +load_all() #because some non-exported functions +#library(talweg) test_check("talweg") diff --git a/pkg/tests/testthat/test.dateIndexToInteger.R b/pkg/tests/testthat/test-DateIntegerConv.R similarity index 68% rename from pkg/tests/testthat/test.dateIndexToInteger.R rename to pkg/tests/testthat/test-DateIntegerConv.R index d3c014a..99e5fa5 100644 --- a/pkg/tests/testthat/test.dateIndexToInteger.R +++ b/pkg/tests/testthat/test-DateIntegerConv.R @@ -1,4 +1,4 @@ -context("Check that date <--> integer indexes conversions work") +context("Date <--> integer conversions") ts_data = system.file("testdata","ts_test.csv",package="talweg") exo_data = system.file("testdata","exo_test.csv",package="talweg") @@ -7,7 +7,7 @@ data0 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M data7 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M", working_tz="GMT", predict_at=7, limit=Inf) -test_that("dateIndexToInteger", +test_that("dateIndexToInteger works as expected", { expect_identical( dateIndexToInteger("2007-01-01",data0), 1 ) expect_identical( dateIndexToInteger("2007-01-02",data0), 2 ) @@ -22,7 +22,7 @@ test_that("dateIndexToInteger", expect_identical( dateIndexToInteger("2007-05-31",data7), 151 ) }) -test_that("integerIndexToDate", +test_that("integerIndexToDate works as expected", { expect_identical( integerIndexToDate( 1,data0), as.Date("2007-01-01") ) expect_identical( integerIndexToDate( 2,data0), as.Date("2007-01-02") ) @@ -53,24 +53,24 @@ test_that("dateIndexToInteger(integerIndexToDate) == Id", test_that("integerIndexToDate(dateIndexToInteger) == Id", { - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-01",data0),data0), as.Date("2007-01-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-01",data7),data7), as.Date("2007-01-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-02",data0),data0), as.Date("2007-01-02") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-02",data7),data7), as.Date("2007-01-02") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), as.Date("2007-02-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), as.Date("2007-02-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), as.Date("2007-03-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), as.Date("2007-03-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), as.Date("2007-05-31") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), as.Date("2007-05-31") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-01",data0),data0), + as.Date("2007-01-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-01",data7),data7), + as.Date("2007-01-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-02",data0),data0), + as.Date("2007-01-02") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-02",data7),data7), + as.Date("2007-01-02") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), + as.Date("2007-02-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), + as.Date("2007-02-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), + as.Date("2007-03-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), + as.Date("2007-03-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), + as.Date("2007-05-31") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), + as.Date("2007-05-31") ) }) diff --git a/pkg/tests/testthat/test.Forecaster.R b/pkg/tests/testthat/test-Forecaster.R similarity index 93% rename from pkg/tests/testthat/test.Forecaster.R rename to pkg/tests/testthat/test-Forecaster.R index a02e6e9..9b3eaa0 100644 --- a/pkg/tests/testthat/test.Forecaster.R +++ b/pkg/tests/testthat/test-Forecaster.R @@ -43,16 +43,20 @@ test_that("Average method behave as expected", test_that("Persistence method behave as expected", { #Situation A: +Zero; (generally) correct if jump, wrong otherwise - pred00_sd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, same_day=TRUE) - pred00_dd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, same_day=FALSE) + pred00_sd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, + same_day=TRUE) + pred00_dd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, + same_day=FALSE) for (i in 1:7) { expect_equal(pred00_sd$getSerie(i), rep(pred_order[i],24)) expect_equal(pred00_dd$getSerie(i), rep(pred_order[i],24)) } - pred13_sd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, same_day=TRUE) - pred13_dd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, same_day=FALSE) + pred13_sd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + same_day=TRUE) + pred13_dd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + same_day=FALSE) for (i in 2:6) { expect_equal(pred13_sd$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) @@ -105,18 +109,21 @@ test_that("Persistence method behave as expected", test_that("Neighbors method behave as expected", { #Situation A: +Zero; correct if jump, wrong otherwise - pred00 = computeForecast(data00, indices, "Neighbors", "Zero", Inf, 24, simtype="mix") + pred00 = computeForecast(data00, indices, "Neighbors", "Zero", Inf, 24, + simtype="mix") for (i in 1:7) expect_equal(pred00$getSerie(i), rep(pred_order[i],24)) - pred13 = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, simtype="mix") + pred13 = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + simtype="mix") for (i in 1:7) expect_equal(pred13$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) #Situation B: +Neighbors, always predict bad (small, averaged) jump - pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24, simtype="endo") - #Concerning weights, there are 12+(1 if i>=2) gaps at -6 and 90-12+(i-2 if i>=3) gaps at 1 - #Thus, predicted jump is respectively + pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24, + simtype="endo") + #Concerning weights, there are 12+(1 if i>=2) gaps at -6 and 90-12+(i-2 if i>=3) gaps + #at 1. Thus, predicted jump is respectively # (12*-6+78)/90 = 0.06666667 # (13*-6+78)/91 = 0 # (13*-6+79)/92 = 0.01086957 @@ -128,8 +135,10 @@ test_that("Neighbors method behave as expected", for (i in 1:7) expect_equal(pred00$getSerie(i), rep(pred_order[i]+jumps[i],24)) - #Next lines commented out because too unpredictable results (tendency to flatten everything...) -# pred13 = computeForecast(data13, indices, "Neighbors", "Neighbors", Inf, 24, simtype="endo") + #Next lines commented out because too unpredictable results + #(tendency to flatten everything...) +# pred13 = computeForecast(data13, indices, "Neighbors", "Neighbors", Inf, 24, +# simtype="endo") # for (i in 1:7) # expect_equal(pred13$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) diff --git a/pkg/tests/testthat/test.computeFilaments.R b/pkg/tests/testthat/test-computeFilaments.R similarity index 97% rename from pkg/tests/testthat/test.computeFilaments.R rename to pkg/tests/testthat/test-computeFilaments.R index ec39340..355d58d 100644 --- a/pkg/tests/testthat/test.computeFilaments.R +++ b/pkg/tests/testthat/test-computeFilaments.R @@ -1,4 +1,4 @@ -context("Check that computeFilaments behaves as expected") +context("computeFilaments") #shorthand: map 1->1, 2->2, 3->3, 4->1, ..., 149->2, 150->3 I = function(i) diff --git a/pkg/tests/testthat/test-similarDays.R b/pkg/tests/testthat/test-similarDays.R new file mode 100644 index 0000000..0d33fce --- /dev/null +++ b/pkg/tests/testthat/test-similarDays.R @@ -0,0 +1,16 @@ +context("Get similar days") + +itestthat("getSimilarDaysIndices works as expected", +{ + getSimilarDaysIndices(index, data, limit, same_season, days_in=NULL) + #... +}) +{ + index = dateIndexToInteger(index, data) + +testthat("getConstrainedNeighbs works as expected", +{ + .getConstrainedNeighbs(today, data, fdays, min_neighbs=10, max_neighbs=12) + #... +}) + -- 2.44.0 From c36568faefc97bb417d355e2084398c1ad1acf92 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 10 Apr 2017 14:29:21 +0200 Subject: [PATCH 08/16] fix tests --- pkg/R/F_Neighbors.R | 2 +- pkg/R/J_Neighbors.R | 5 +- pkg/R/utils.R | 8 +- pkg/tests/testthat/helper.R | 31 ++++++ pkg/tests/testthat/test-Forecaster.R | 104 +++++++++------------ pkg/tests/testthat/test-computeFilaments.R | 38 +------- pkg/tests/testthat/test-similarDays.R | 27 ++++-- 7 files changed, 103 insertions(+), 112 deletions(-) create mode 100644 pkg/tests/testthat/helper.R diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index ffb068f..9af4725 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -264,7 +264,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", fdays = fdays[ sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ] } - fdsays + fdays } #' compute similarities diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R index 7f66830..40341d9 100644 --- a/pkg/R/J_Neighbors.R +++ b/pkg/R/J_Neighbors.R @@ -17,11 +17,8 @@ getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) indices = params$indices[filter] weights = params$weights[filter] - if (any(is.na(weights) | is.na(indices))) - return (NA) - gaps = sapply(indices, function(i) { - head( data$getSerie(i+1), 1) - tail( data$getSerie(i), 1) + head( data$getSerie(i+1),1 ) - tail( data$getSerie(i),1 ) }) scal_product = weights * gaps norm_fact = sum( weights[!is.na(scal_product)] ) diff --git a/pkg/R/utils.R b/pkg/R/utils.R index 5ba72f0..ddf0bb1 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -68,11 +68,11 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) index = dateIndexToInteger(index, data) # Look for similar days (optionally in same season) - i = index - 1 days = c() dt_ref = as.POSIXlt(data$getTime(index)[1]) #first date-time of current day day_ref = dt_ref$wday #1=monday, ..., 6=saturday, 0=sunday month_ref = as.POSIXlt(data$getTime(index)[1])$mon+1 #month in 1...12 + i = index - 1 while (i >= 1 && length(days) < limit) { dt = as.POSIXlt(data$getTime(i)[1]) @@ -111,9 +111,7 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) # .isSameDay = function(day, day_ref) { - if (day_ref == 0) - return (day==0) - if (day_ref <= 4) - return (day <= 4) + if (day_ref %in% 1:4) + return (day %in% 1:4) return (day == day_ref) } diff --git a/pkg/tests/testthat/helper.R b/pkg/tests/testthat/helper.R new file mode 100644 index 0000000..491cf9c --- /dev/null +++ b/pkg/tests/testthat/helper.R @@ -0,0 +1,31 @@ +#shorthand: map 1->1, 2->2, 3->3, 4->1, ..., 149->2, 150->3 +I = function(i) + (i-1) %% 3 + 1 + +#MOCK data; NOTE: could be in inst/testdata as well +getDataTest = function(n) +{ + data = Data$new() + x = seq(0,9.5,0.1) + L = length(x) #96 1/4h + s1 = cos(x) + s2 = sin(x) + s3 = c( s1[1:(L%/%2)] , s2[(L%/%2+1):L] ) + #sum((s1-s2)^2) == 96 + #sum((s1-s3)^2) == 58 + #sum((s2-s3)^2) == 38 + s = list(s1, s2, s3) + series = list() + for (i in seq_len(n)) + { + serie = s[[I(i)]] + rnorm(L,sd=0.01) + # 10 series with NAs for index 2 + if (I(i) == 2 && i >= 60 && i<= 90) + serie[sample(seq_len(L),1)] = NA + time = as.POSIXct((i-1)*60*60*24+15*60*(1:96), origin="2007-01-01", tz="GMT") + exo = runif(4) + exo_hat = runif(4) + data$append(time, serie, exo, exo_hat) + } + data +} diff --git a/pkg/tests/testthat/test-Forecaster.R b/pkg/tests/testthat/test-Forecaster.R index 9b3eaa0..09b6f0a 100644 --- a/pkg/tests/testthat/test-Forecaster.R +++ b/pkg/tests/testthat/test-Forecaster.R @@ -12,25 +12,25 @@ pred_order = c(7,1:6) #will facilitate tests test_that("Average method behave as expected", { - pred00_z = computeForecast(data00, indices, "Average", "Zero", Inf, 24) + pred00_z = computeForecast(data00, indices, "Average", "Zero", Inf, 24) pred00_p = computeForecast(data00, indices, "Average", "Persistence", Inf, 24) for (i in 1:7) { #zero jump: should predict true values minus 1 - expect_equal( pred00_z$getSerie(i), rep(pred_order[i],24) ) + expect_equal( pred00_z$getForecast(i), rep(pred_order[i],24) ) #persistence jump == 1: should predict true values - expect_equal( pred00_p$getSerie(i), rep(i,24) ) + expect_equal( pred00_p$getForecast(i), rep(i,24) ) } #NOTE: days become #1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 (14h-->0h then 1h-->13h) #No jump between days, thus zero and persistence are equivalent (and correct) - pred13_z = computeForecast(data13, indices, "Average", "Zero", Inf, 24) + pred13_z = computeForecast(data13, indices, "Average", "Zero", Inf, 24) pred13_p = computeForecast(data13, indices, "Average", "Persistence", Inf, 24) for (i in 1:7) { - expect_equal( pred13_z$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) - expect_equal( pred13_p$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal( pred13_z$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal( pred13_p$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) } #A few extra checks @@ -44,60 +44,60 @@ test_that("Persistence method behave as expected", { #Situation A: +Zero; (generally) correct if jump, wrong otherwise pred00_sd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, - same_day=TRUE) + ncores=1, same_day=TRUE) pred00_dd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, - same_day=FALSE) + ncores=1, same_day=FALSE) for (i in 1:7) { - expect_equal(pred00_sd$getSerie(i), rep(pred_order[i],24)) - expect_equal(pred00_dd$getSerie(i), rep(pred_order[i],24)) + expect_equal(pred00_sd$getForecast(i), rep(pred_order[i],24)) + expect_equal(pred00_dd$getForecast(i), rep(pred_order[i],24)) } pred13_sd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, - same_day=TRUE) + ncores=1, same_day=TRUE) pred13_dd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, - same_day=FALSE) + ncores=1, same_day=FALSE) for (i in 2:6) { - expect_equal(pred13_sd$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) - expect_equal(pred13_dd$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13_sd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13_dd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) } #boundaries are special cases: OK if same day, quite wrong otherwise - expect_equal(pred13_sd$getSerie(1), c( rep(1,11), rep(2,13) ) ) - expect_equal(pred13_dd$getSerie(1), c( rep(1,11), rep(-5,13) ) ) - expect_equal(pred13_sd$getSerie(7), c( rep(7,11), rep(1,13) ) ) - expect_equal(pred13_dd$getSerie(7), c( rep(7,11), rep(8,13) ) ) + expect_equal(pred13_sd$getForecast(1), c( rep(1,11), rep(2,13) ) ) + expect_equal(pred13_dd$getForecast(1), c( rep(1,11), rep(-5,13) ) ) + expect_equal(pred13_sd$getForecast(7), c( rep(7,11), rep(1,13) ) ) + expect_equal(pred13_dd$getForecast(7), c( rep(7,11), rep(8,13) ) ) #Situation B: +Persistence, (generally) correct pred00_sd = computeForecast(data00, indices, "Persistence", "Persistence", Inf, 24, - same_day=TRUE) + ncores=1, same_day=TRUE) pred00_dd = computeForecast(data00, indices, "Persistence", "Persistence", Inf, 24, - same_day=FALSE) + ncores=1, same_day=FALSE) for (i in 3:7) { - expect_equal(pred00_sd$getSerie(i), rep(i,24)) - expect_equal(pred00_dd$getSerie(i), rep(i,24)) + expect_equal(pred00_sd$getForecast(i), rep(i,24)) + expect_equal(pred00_dd$getForecast(i), rep(i,24)) } #boundaries are special cases: OK if same day, quite wrong otherwise - expect_equal(pred00_sd$getSerie(1), rep(1,24) ) - expect_equal(pred00_dd$getSerie(1), rep(8,24) ) - expect_equal(pred00_sd$getSerie(2), rep(2,24) ) - expect_equal(pred00_dd$getSerie(2), rep(-5,24) ) + expect_equal(pred00_sd$getForecast(1), rep(1,24) ) + expect_equal(pred00_dd$getForecast(1), rep(8,24) ) + expect_equal(pred00_sd$getForecast(2), rep(2,24) ) + expect_equal(pred00_dd$getForecast(2), rep(-5,24) ) pred13_sd = computeForecast(data13, indices, "Persistence", "Persistence", Inf, 24, - same_day=TRUE) + ncores=1, same_day=TRUE) pred13_dd = computeForecast(data13, indices, "Persistence", "Persistence", Inf, 24, - same_day=FALSE) + ncores=1, same_day=FALSE) for (i in 2:6) { - expect_equal(pred13_sd$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) - expect_equal(pred13_dd$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13_sd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13_dd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) } #boundaries are special cases: OK if same day, quite wrong otherwise - expect_equal(pred13_sd$getSerie(1), c( rep(1,11), rep(2,13) ) ) - expect_equal(pred13_dd$getSerie(1), c( rep(1,11), rep(-5,13) ) ) - expect_equal(pred13_sd$getSerie(7), c( rep(7,11), rep(1,13) ) ) - expect_equal(pred13_dd$getSerie(7), c( rep(7,11), rep(8,13) ) ) + expect_equal(pred13_sd$getForecast(1), c( rep(1,11), rep(2,13) ) ) + expect_equal(pred13_dd$getForecast(1), c( rep(1,11), rep(-5,13) ) ) + expect_equal(pred13_sd$getForecast(7), c( rep(7,11), rep(1,13) ) ) + expect_equal(pred13_dd$getForecast(7), c( rep(7,11), rep(8,13) ) ) #A few extra checks expect_equal( pred00_sd$getIndexInData(3), dateIndexToInteger("2007-04-03",data00) ) @@ -110,37 +110,25 @@ test_that("Neighbors method behave as expected", { #Situation A: +Zero; correct if jump, wrong otherwise pred00 = computeForecast(data00, indices, "Neighbors", "Zero", Inf, 24, - simtype="mix") + simtype="mix", local=FALSE) for (i in 1:7) - expect_equal(pred00$getSerie(i), rep(pred_order[i],24)) + expect_equal(pred00$getForecast(i), rep(pred_order[i],24)) pred13 = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, - simtype="mix") + simtype="mix", local=FALSE) for (i in 1:7) - expect_equal(pred13$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) - #Situation B: +Neighbors, always predict bad (small, averaged) jump - pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24, - simtype="endo") - #Concerning weights, there are 12+(1 if i>=2) gaps at -6 and 90-12+(i-2 if i>=3) gaps - #at 1. Thus, predicted jump is respectively - # (12*-6+78)/90 = 0.06666667 - # (13*-6+78)/91 = 0 - # (13*-6+79)/92 = 0.01086957 - # (13*-6+80)/93 = 0.02150538 - # (13*-6+81)/94 = 0.03191489 - # (13*-6+82)/95 = 0.04210526 - # (13*-6+83)/96 = 0.05208333 - jumps = c(0.06666667, 0, 0.01086957, 0.02150538, 0.03191489, 0.04210526, 0.05208333) - for (i in 1:7) - expect_equal(pred00$getSerie(i), rep(pred_order[i]+jumps[i],24)) - - #Next lines commented out because too unpredictable results - #(tendency to flatten everything...) + #Situation B: +Neighbors == too difficult to eval in a unit test +# pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24, +# simtype="endo", local=FALSE) +# jumps = ... +# for (i in 1:7) +# expect_equal(pred00$getForecast(i), rep(pred_order[i]+jumps[i],24)) # pred13 = computeForecast(data13, indices, "Neighbors", "Neighbors", Inf, 24, -# simtype="endo") +# simtype="endo", local=FALSE) # for (i in 1:7) -# expect_equal(pred13$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) +# expect_equal(pred13$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) #A few extra checks expect_equal( pred00$getIndexInData(1), dateIndexToInteger("2007-04-01",data00) ) diff --git a/pkg/tests/testthat/test-computeFilaments.R b/pkg/tests/testthat/test-computeFilaments.R index 355d58d..7e1cafa 100644 --- a/pkg/tests/testthat/test-computeFilaments.R +++ b/pkg/tests/testthat/test-computeFilaments.R @@ -1,46 +1,12 @@ context("computeFilaments") -#shorthand: map 1->1, 2->2, 3->3, 4->1, ..., 149->2, 150->3 -I = function(i) - (i-1) %% 3 + 1 - -#MOCK data; NOTE: could be in inst/testdata as well -getDataTest = function(n) -{ - data = Data$new() - x = seq(0,9.5,0.1) - L = length(x) #96 1/4h - s1 = cos(x) - s2 = sin(x) - s3 = c( s1[1:(L%/%2)] , s2[(L%/%2+1):L] ) - #sum((s1-s2)^2) == 96 - #sum((s1-s3)^2) == 58 - #sum((s2-s3)^2) == 38 - s = list(s1, s2, s3) - series = list() - for (i in seq_len(n)) - { - serie = s[[I(i)]] + rnorm(L,sd=0.01) - level = mean(serie) - serie = serie - level - # 10 series with NAs for index 2 - if (I(i) == 2 && i >= 60 && i<= 90) - serie[sample(seq_len(L),1)] = NA - time = as.POSIXct(i*15*60, origin="2007-01-01", tz="GMT") - exo = runif(4) - exo_hat = runif(4) - data$append(time, serie, level, exo, exo_hat) - } - data -} - test_that("output is as expected on simulated series", { data = getDataTest(150) # index 143 : serie type 2 pred = computeForecast(data, 143, "Neighbors", "Zero", - horizon=length(data$getSerie(1)), simtype="endo", h_window=1) + horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1) f = computeFilaments(data, pred, 1, limit=60, plot=FALSE) # Expected output: 50-3-10 series of type 2, then 23 series of type 3 (closest next) @@ -63,7 +29,7 @@ test_that("output is as expected on simulated series", # index 142 : serie type 1 pred = computeForecast(data, 142, "Neighbors", "Zero", - horizon=length(data$getSerie(1)), simtype="endo", h_window=1) + horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1) f = computeFilaments(data, pred, 1, limit=50, plot=FALSE) # Expected output: 50-10-3 series of type 1, then 13 series of type 3 (closest next) diff --git a/pkg/tests/testthat/test-similarDays.R b/pkg/tests/testthat/test-similarDays.R index 0d33fce..05a7b0e 100644 --- a/pkg/tests/testthat/test-similarDays.R +++ b/pkg/tests/testthat/test-similarDays.R @@ -1,16 +1,27 @@ context("Get similar days") -itestthat("getSimilarDaysIndices works as expected", +test_that("getSimilarDaysIndices works as expected", { - getSimilarDaysIndices(index, data, limit, same_season, days_in=NULL) - #... + data = getDataTest(150) + + # Index 142 is a tuesday (142 = 2 mod 7) + N142_1 = getSimilarDaysIndices(142, data, limit=7, same_season=FALSE, days_in=NULL) + expect_equal(N142_1, c(141,137,136,135,134,130,129)) + # Index 139 = saturday + N139_1 = getSimilarDaysIndices(139, data, limit=7, same_season=FALSE, days_in=NULL) + expect_equal(N139_1, c(132,125,118,111,104,97,90)) + + # With 'days_in' constraint + N142_2 = getSimilarDaysIndices(142, data, limit=7, same_season=FALSE, days_in=2*(1:75)) + expect_equal(N142_2, c(136,134,130,128,122,120,116)) + N139_2 = getSimilarDaysIndices(139, data, limit=7, same_season=FALSE, days_in=2*(1:75)) + expect_equal(N139_2, c(132,118,104,90,76,62,48)) }) -{ - index = dateIndexToInteger(index, data) -testthat("getConstrainedNeighbs works as expected", +test_that("getConstrainedNeighbs works as expected", { - .getConstrainedNeighbs(today, data, fdays, min_neighbs=10, max_neighbs=12) - #... +# data = getDataTest(150) +# N142_1 = .getConstrainedNeighbs(142, data, fdays, min_neighbs=7, max_neighbs=7) +# #...maybe we need an easier test data }) -- 2.44.0 From c8ef2ddb2d1f28b8356a6c0aa7c8495406226a32 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Tue, 11 Apr 2017 15:46:01 +0200 Subject: [PATCH 09/16] fix computeError.R --- pkg/R/computeError.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/R/computeError.R b/pkg/R/computeError.R index 1fbb2a8..3aa028f 100644 --- a/pkg/R/computeError.R +++ b/pkg/R/computeError.R @@ -15,7 +15,7 @@ #' @export computeError = function(data, pred, horizon=data$getStdHorizon()) { - L = forecast$getSize() + L = pred$getSize() mape_day = rep(0, horizon) abs_day = rep(0, horizon) mape_indices = rep(NA, L) @@ -24,7 +24,7 @@ computeError = function(data, pred, horizon=data$getStdHorizon()) nb_no_NA_data = 0 for (i in seq_len(L)) { - index = forecast$getIndexInData(i) + index = pred$getIndexInData(i) serie = data$getSerie(index+1)[1:horizon] forecast = pred$getForecast(i)[1:horizon] if (!any(is.na(serie)) && !any(is.na(forecast))) -- 2.44.0 From 41196789122f4b6bafeb7a306a3b4033637586f4 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Tue, 11 Apr 2017 16:17:50 +0200 Subject: [PATCH 10/16] 'update' --- pkg/R/F_Neighbors.R | 15 --------------- pkg/R/plot.R | 2 +- pkg/R/utils.R | 15 +++++++++++++++ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 9af4725..32d5cff 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -216,21 +216,6 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", ) ) -#' getNoNA2 -#' -#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. -#' -#' @inheritParams dateIndexToInteger -#' @param first First index (included) -#' @param last Last index (included) -#' -.getNoNA2 = function(data, first, last) -{ - (first:last)[ sapply(first:last, function(i) - !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) - ) ] -} - #' getConstrainedNeighbs #' #' Get indices of neighbors of similar pollution level (among same season + day type). diff --git a/pkg/R/plot.R b/pkg/R/plot.R index 48b456c..fe2fb4e 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -236,7 +236,7 @@ plotRelVar = function(data, fil) { ref_var = c( apply(data$getSeries(fil$neighb_indices),1,sd), apply(data$getSeries(fil$neighb_indices+1),1,sd) ) - fdays = getNoNA2(data, 1, fil$index-1) + fdays = .getNoNA2(data, 1, fil$index-1) global_var = c( apply(data$getSeries(fdays),1,sd), apply(data$getSeries(fdays+1),1,sd) ) diff --git a/pkg/R/utils.R b/pkg/R/utils.R index ddf0bb1..3f32868 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -115,3 +115,18 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) return (day %in% 1:4) return (day == day_ref) } + +#' getNoNA2 +#' +#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. +#' +#' @inheritParams dateIndexToInteger +#' @param first First index (included) +#' @param last Last index (included) +#' +.getNoNA2 = function(data, first, last) +{ + (first:last)[ sapply(first:last, function(i) + !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) + ) ] +} -- 2.44.0 From 63afc6d96c7531b1fbb91c0c55979e124e3efc14 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 12 Apr 2017 01:29:43 +0200 Subject: [PATCH 11/16] improve report --- reports/report.gj | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/reports/report.gj b/reports/report.gj index da1ce3e..0273a16 100644 --- a/reports/report.gj +++ b/reports/report.gj @@ -63,8 +63,9 @@ plotError(list(e1, e5, e4, e2, e3), cols=c(1,2,colors()[258],4,6)) # noir: Neighbors non-local (p1), bleu: Neighbors local endo (p2), mauve: Neighbors local none (p3), # vert: moyenne (p4), rouge: persistence (p5) -i_np = which.min(e1$abs$indices) -i_p = which.max(e1$abs$indices) +sum_p123 = e1$abs$indices + e2$abs$indices + e3$abs$indices +i_np = which.min(sum_p123) +i_p = which.max(sum_p123) -----r options(repr.plot.width=9, repr.plot.height=4) par(mfrow=c(1,2)) @@ -111,7 +112,7 @@ plotSimils(p2, i_p); title(paste("Weights p2 day",i_p)) # - pollué à gauche, + pollué à droite -----r -# Fenêtres sélectionnées dans ]0,7] / non-loc à gauche, loc à droite +# Fenêtres sélectionnées dans ]0,7] / non-loc 2 premières lignes, loc ensuite p1$getParams(i_np)$window p1$getParams(i_p)$window -- 2.44.0 From 0e789b3a35c296898da99bc0a13bcad56ff14ad3 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 12 Apr 2017 12:57:41 +0200 Subject: [PATCH 12/16] prepare run.sh for ipynb output --- reports/run.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/reports/run.sh b/reports/run.sh index d108199..93df861 100755 --- a/reports/run.sh +++ b/reports/run.sh @@ -8,5 +8,6 @@ htmlfile=report_P$1_H$2.html jupyter-nbconvert \ --ExecutePreprocessor.kernel_name='ir' \ --ExecutePreprocessor.timeout=1800 \ - --to html --execute $nbfile \ - --output=$htmlfile + --execute $nbfile \ + --to notebook --output report.ipynb +# --to html --output=$htmlfile -- 2.44.0 From 41e7dc8f4b4f93e83de7b3ccc1f406eea4a8e63b Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 12 Apr 2017 18:36:48 +0200 Subject: [PATCH 13/16] add part 3 of final report --- .gitignore | 4 ++++ reports/rapport_final/report_P7_H17.zip | 1 + reports/run.sh | 10 +++++----- 3 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 reports/rapport_final/report_P7_H17.zip diff --git a/.gitignore b/.gitignore index 030f5d7..0156460 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,10 @@ NAMESPACE #large CSV files data/*.csv +#misc +*.png +*.tex + #.gitattributes/.gitfat files are generated by initialize.sh .gitattributes .gitfat diff --git a/reports/rapport_final/report_P7_H17.zip b/reports/rapport_final/report_P7_H17.zip new file mode 100644 index 0000000..59fff55 --- /dev/null +++ b/reports/rapport_final/report_P7_H17.zip @@ -0,0 +1 @@ +#$# git-fat cd80c05743e914c0398c48933919bca3704e275f 2744640 diff --git a/reports/run.sh b/reports/run.sh index 93df861..044d00d 100755 --- a/reports/run.sh +++ b/reports/run.sh @@ -1,13 +1,13 @@ #!/bin/sh # Usage: ./run.sh P H -nbfile=report_P$1_H$2.ipynb -./ipynb_generator.py report.gj $nbfile P=$1 H=$2 +./ipynb_generator.py report.gj - P=$1 H=$2 -htmlfile=report_P$1_H$2.html +#htmlfile=report_P$1_H$2.html +nbfile=report_P$1_H$2.ipynb jupyter-nbconvert \ --ExecutePreprocessor.kernel_name='ir' \ --ExecutePreprocessor.timeout=1800 \ - --execute $nbfile \ - --to notebook --output report.ipynb + --execute report.ipynb \ + --to notebook --output $nbfile # --to html --output=$htmlfile -- 2.44.0 From 9aca86719e8acf8a1046db7a50fa95406105632a Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 12 Apr 2017 18:44:44 +0200 Subject: [PATCH 14/16] 'update' --- reports/rapport_final/report_P7_H17.zip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/rapport_final/report_P7_H17.zip b/reports/rapport_final/report_P7_H17.zip index 59fff55..b2363f2 100644 --- a/reports/rapport_final/report_P7_H17.zip +++ b/reports/rapport_final/report_P7_H17.zip @@ -1 +1 @@ -#$# git-fat cd80c05743e914c0398c48933919bca3704e275f 2744640 +#$# git-fat 9c8c710cadee05fb9695e943614093cb75e5c5cd 2744676 -- 2.44.0 From 4ba96933bd3eb63800ef9e98edbabe693aec7340 Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Wed, 12 Apr 2017 18:49:06 +0200 Subject: [PATCH 15/16] add report.tex --- .gitignore | 4 +- reports/rapport_final/report_P7_H17.tex | 985 ++++++++++++++++++++++++ 2 files changed, 987 insertions(+), 2 deletions(-) create mode 100644 reports/rapport_final/report_P7_H17.tex diff --git a/.gitignore b/.gitignore index 0156460..92a17ad 100644 --- a/.gitignore +++ b/.gitignore @@ -29,14 +29,14 @@ data/*.csv #misc *.png -*.tex +#*.tex #.gitattributes/.gitfat files are generated by initialize.sh .gitattributes .gitfat #Rmarkdown, knitr + LaTeX generated files -*.tex +#*.tex *.pdf !/biblio/*.pdf *.aux diff --git a/reports/rapport_final/report_P7_H17.tex b/reports/rapport_final/report_P7_H17.tex new file mode 100644 index 0000000..012df04 --- /dev/null +++ b/reports/rapport_final/report_P7_H17.tex @@ -0,0 +1,985 @@ + +% Default to the notebook output style + + + + +% Inherit from the specified cell style. + + + + + +\documentclass[11pt]{article} + + + + \usepackage[T1]{fontenc} + % Nicer default font (+ math font) than Computer Modern for most use cases + \usepackage{mathpazo} + + % Basic figure setup, for now with no caption control since it's done + % automatically by Pandoc (which extracts ![](path) syntax from Markdown). + \usepackage{graphicx} + % We will generate all images so they have a width \maxwidth. This means + % that they will get their normal width if they fit onto the page, but + % are scaled down if they would overflow the margins. + \makeatletter + \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth + \else\Gin@nat@width\fi} + \makeatother + \let\Oldincludegraphics\includegraphics + % Set max figure width to be 80% of text width, for now hardcoded. + \renewcommand{\includegraphics}[1]{\Oldincludegraphics[width=.8\maxwidth]{#1}} + % Ensure that by default, figures have no caption (until we provide a + % proper Figure object with a Caption API and a way to capture that + % in the conversion process - todo). + \usepackage{caption} + \DeclareCaptionLabelFormat{nolabel}{} + \captionsetup{labelformat=nolabel} + + \usepackage{adjustbox} % Used to constrain images to a maximum size + \usepackage{xcolor} % Allow colors to be defined + \usepackage{enumerate} % Needed for markdown enumerations to work + \usepackage{geometry} % Used to adjust the document margins + \usepackage{amsmath} % Equations + \usepackage{amssymb} % Equations + \usepackage{textcomp} % defines textquotesingle + % Hack from http://tex.stackexchange.com/a/47451/13684: + \AtBeginDocument{% + \def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code + } + \usepackage{upquote} % Upright quotes for verbatim code + \usepackage{eurosym} % defines \euro + \usepackage[mathletters]{ucs} % Extended unicode (utf-8) support + \usepackage[utf8x]{inputenc} % Allow utf-8 characters in the tex document + \usepackage{fancyvrb} % verbatim replacement that allows latex + \usepackage{grffile} % extends the file name processing of package graphics + % to support a larger range + % The hyperref package gives us a pdf with properly built + % internal navigation ('pdf bookmarks' for the table of contents, + % internal cross-reference links, web links for URLs, etc.) + \usepackage{hyperref} + \usepackage{longtable} % longtable support required by pandoc >1.10 + \usepackage{booktabs} % table support for pandoc > 1.12.2 + \usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment) + \usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout) + % normalem makes italics be italics, not underlines + + + + + % Colors for the hyperref package + \definecolor{urlcolor}{rgb}{0,.145,.698} + \definecolor{linkcolor}{rgb}{.71,0.21,0.01} + \definecolor{citecolor}{rgb}{.12,.54,.11} + + % ANSI colors + \definecolor{ansi-black}{HTML}{3E424D} + \definecolor{ansi-black-intense}{HTML}{282C36} + \definecolor{ansi-red}{HTML}{E75C58} + \definecolor{ansi-red-intense}{HTML}{B22B31} + \definecolor{ansi-green}{HTML}{00A250} + \definecolor{ansi-green-intense}{HTML}{007427} + \definecolor{ansi-yellow}{HTML}{DDB62B} + \definecolor{ansi-yellow-intense}{HTML}{B27D12} + \definecolor{ansi-blue}{HTML}{208FFB} + \definecolor{ansi-blue-intense}{HTML}{0065CA} + \definecolor{ansi-magenta}{HTML}{D160C4} + \definecolor{ansi-magenta-intense}{HTML}{A03196} + \definecolor{ansi-cyan}{HTML}{60C6C8} + \definecolor{ansi-cyan-intense}{HTML}{258F8F} + \definecolor{ansi-white}{HTML}{C5C1B4} + \definecolor{ansi-white-intense}{HTML}{A1A6B2} + + % commands and environments needed by pandoc snippets + % extracted from the output of `pandoc -s` + \providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} + \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} + % Add ',fontsize=\small' for more characters per line + \newenvironment{Shaded}{}{} + \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} + \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}} + \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}} + \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}} + \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} + \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}} + \newcommand{\RegionMarkerTok}[1]{{#1}} + \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} + \newcommand{\NormalTok}[1]{{#1}} + + % Additional commands for more recent versions of Pandoc + \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}} + \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}} + \newcommand{\ImportTok}[1]{{#1}} + \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}} + \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}} + \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} + \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}} + \newcommand{\BuiltInTok}[1]{{#1}} + \newcommand{\ExtensionTok}[1]{{#1}} + \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}} + \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}} + \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + + + % Define a nice break command that doesn't care if a line doesn't already + % exist. + \def\br{\hspace*{\fill} \\* } + % Math Jax compatability definitions + \def\gt{>} + \def\lt{<} + % Document parameters + \title{report\_P7\_H17} + + + + + % Pygments definitions + +\makeatletter +\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax% + \let\PY@ul=\relax \let\PY@tc=\relax% + \let\PY@bc=\relax \let\PY@ff=\relax} +\def\PY@tok#1{\csname PY@tok@#1\endcsname} +\def\PY@toks#1+{\ifx\relax#1\empty\else% + \PY@tok{#1}\expandafter\PY@toks\fi} +\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{% + \PY@it{\PY@bf{\PY@ff{#1}}}}}}} +\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}} + +\expandafter\def\csname PY@tok@w\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}} +\expandafter\def\csname PY@tok@c\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}} +\expandafter\def\csname PY@tok@k\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}} +\expandafter\def\csname PY@tok@o\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@ow\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} +\expandafter\def\csname PY@tok@nb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@nf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@nc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@nn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@ne\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}} +\expandafter\def\csname PY@tok@nv\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@no\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@nl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}} +\expandafter\def\csname PY@tok@ni\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}} +\expandafter\def\csname PY@tok@na\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}} +\expandafter\def\csname PY@tok@nt\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@nd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} +\expandafter\def\csname PY@tok@s\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sd\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@si\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}} +\expandafter\def\csname PY@tok@se\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}} +\expandafter\def\csname PY@tok@sr\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}} +\expandafter\def\csname PY@tok@ss\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@sx\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@m\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@gh\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@gu\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@gd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@gi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}} +\expandafter\def\csname PY@tok@gr\endcsname{\def\PY@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@ge\endcsname{\let\PY@it=\textit} +\expandafter\def\csname PY@tok@gs\endcsname{\let\PY@bf=\textbf} +\expandafter\def\csname PY@tok@gp\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@go\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}} +\expandafter\def\csname PY@tok@gt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}} +\expandafter\def\csname PY@tok@err\endcsname{\def\PY@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}} +\expandafter\def\csname PY@tok@kc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kd\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kr\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@bp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@fm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@vc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vg\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@sa\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@dl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@s2\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@s1\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@mb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@il\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mo\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@ch\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cm\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cpf\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@c1\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cs\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} + +\def\PYZbs{\char`\\} +\def\PYZus{\char`\_} +\def\PYZob{\char`\{} +\def\PYZcb{\char`\}} +\def\PYZca{\char`\^} +\def\PYZam{\char`\&} +\def\PYZlt{\char`\<} +\def\PYZgt{\char`\>} +\def\PYZsh{\char`\#} +\def\PYZpc{\char`\%} +\def\PYZdl{\char`\$} +\def\PYZhy{\char`\-} +\def\PYZsq{\char`\'} +\def\PYZdq{\char`\"} +\def\PYZti{\char`\~} +% for compatibility with earlier versions +\def\PYZat{@} +\def\PYZlb{[} +\def\PYZrb{]} +\makeatother + + + % Exact colors from NB + \definecolor{incolor}{rgb}{0.0, 0.0, 0.5} + \definecolor{outcolor}{rgb}{0.545, 0.0, 0.0} + + + + + % Prevent overflowing lines due to hard-to-break entities + \sloppy + % Setup hyperref package + \hypersetup{ + breaklinks=true, % so long urls are correctly broken across lines + colorlinks=true, + urlcolor=urlcolor, + linkcolor=linkcolor, + citecolor=citecolor, + } + % Slightly bigger margins than the latex defaults + + \geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in} + + + + \begin{document} + + + \maketitle + + + + + \subsection*{Introduction} + +Cette partie montre les résultats obtenus via des variantes de +l'algorithme décrit à la section 2, en utilisant le package présenté à +la section 3. Cet algorithme est systématiquement comparé à deux +approches naïves : * la moyenne des lendemains des jours "similaires" +dans tout le passé, c'est-à-dire prédiction = moyenne de tous les mardis +passé si le jour courant est un lndi par exemple. * la persistence, +reproduisant le jour courant ou allant chercher le lendemain de la +dernière journée "similaire" (même principe que ci-dessus ; argument +"same\_day"). + +Concernant l'algorithme principal à voisins, trois variantes sont +étudiées dans cette partie : * avec simtype="mix" et raccordement +"Neighbors" dans le cas "non local", i.e. on va chercher des voisins +n'importe où du moment qu'ils correspondent au premier élément d'un +couple de deux jours consécutifs sans valeurs manquantes. * avec +simtype="endo" + raccordement "Neighbors" puis simtype="none" + +raccordement "Zero" (sans ajustement) dans le cas "local" : voisins de +même niveau de pollution et même saison. + +Pour chaque période retenue -\/- chauffage, épandage, semaine non +polluée -\/- les erreurs de prédiction sont d'abord affichées, puis +quelques graphes de courbes réalisées/prévues (sur le jour "en moyenne +le plus facile" à gauche, et "en moyenne le plus difficile" à droite). +Ensuite plusieurs types de graphes apportant des précisions sur la +nature et la difficulté du problème viennent compléter ces premières +courbes. Concernant les graphes de filaments, la moitié gauche du graphe +correspond aux jours similaires au jour courant, tandis que la moitié +droite affiche les lendemains : ce sont donc les voisinages tels +qu'utilisés dans l'algorithme. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}1}]:} \PY{n}{library}\PY{p}{(}\PY{n}{talweg}\PY{p}{)} + + \PY{n}{P} \PY{o}{=} \PY{l+m+mi}{7} \PY{c+c1}{\PYZsh{}instant de prévision} + \PY{n}{H} \PY{o}{=} \PY{l+m+mi}{17} \PY{c+c1}{\PYZsh{}horizon (en heures)} + + \PY{n}{ts\PYZus{}data} \PY{o}{=} \PY{n}{read}\PY{o}{.}\PY{n}{csv}\PY{p}{(}\PY{n}{system}\PY{o}{.}\PY{n}{file}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{extdata}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{pm10\PYZus{}mesures\PYZus{}H\PYZus{}loc\PYZus{}report.csv}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} + \PY{n}{package}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{talweg}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{)} + \PY{n}{exo\PYZus{}data} \PY{o}{=} \PY{n}{read}\PY{o}{.}\PY{n}{csv}\PY{p}{(}\PY{n}{system}\PY{o}{.}\PY{n}{file}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{extdata}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{meteo\PYZus{}extra\PYZus{}noNAs.csv}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{package}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{talweg}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{)} + \PY{c+c1}{\PYZsh{} NOTE: \PYZsq{}GMT\PYZsq{} because DST gaps are filled and multiple values merged in above dataset.} + \PY{c+c1}{\PYZsh{} Prediction from P+1 to P+H included.} + \PY{n}{data} \PY{o}{=} \PY{n}{getData}\PY{p}{(}\PY{n}{ts\PYZus{}data}\PY{p}{,} \PY{n}{exo\PYZus{}data}\PY{p}{,} \PY{n}{input\PYZus{}tz} \PY{o}{=} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{GMT}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{working\PYZus{}tz}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{GMT}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{predict\PYZus{}at}\PY{o}{=}\PY{n}{P}\PY{p}{)} + + \PY{n}{indices\PYZus{}ch} \PY{o}{=} \PY{n}{seq}\PY{p}{(}\PY{k}{as}\PY{o}{.}\PY{n}{Date}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{2015\PYZhy{}01\PYZhy{}18}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{,}\PY{k}{as}\PY{o}{.}\PY{n}{Date}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{2015\PYZhy{}01\PYZhy{}24}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{,}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{days}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)} + \PY{n}{indices\PYZus{}ep} \PY{o}{=} \PY{n}{seq}\PY{p}{(}\PY{k}{as}\PY{o}{.}\PY{n}{Date}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{2015\PYZhy{}03\PYZhy{}15}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{,}\PY{k}{as}\PY{o}{.}\PY{n}{Date}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{2015\PYZhy{}03\PYZhy{}21}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{,}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{days}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)} + \PY{n}{indices\PYZus{}np} \PY{o}{=} \PY{n}{seq}\PY{p}{(}\PY{k}{as}\PY{o}{.}\PY{n}{Date}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{2015\PYZhy{}04\PYZhy{}26}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{,}\PY{k}{as}\PY{o}{.}\PY{n}{Date}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{2015\PYZhy{}05\PYZhy{}02}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{p}{,}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{days}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)} +\end{Verbatim} + + \subsection*{Pollution par chauffage} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}2}]:} \PY{n}{p1} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ch}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{mix}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{FALSE}\PY{p}{)} + \PY{n}{p2} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ch}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{endo}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{p3} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ch}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{none}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{p4} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ch}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Average}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{)} + \PY{n}{p5} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ch}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Persistence}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{same\PYZus{}day}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} +\end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}3}]:} \PY{n}{e1} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e2} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e3} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e4} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p4}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e5} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p5}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{options}\PY{p}{(}\PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{width}\PY{o}{=}\PY{l+m+mi}{9}\PY{p}{,} \PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{height}\PY{o}{=}\PY{l+m+mi}{7}\PY{p}{)} + \PY{n}{plotError}\PY{p}{(}\PY{n+nb}{list}\PY{p}{(}\PY{n}{e1}\PY{p}{,} \PY{n}{e5}\PY{p}{,} \PY{n}{e4}\PY{p}{,} \PY{n}{e2}\PY{p}{,} \PY{n}{e3}\PY{p}{)}\PY{p}{,} \PY{n}{cols}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{,}\PY{n}{colors}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{258}\PY{p}{]}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{,}\PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} noir: Neighbors non\PYZhy{}local (p1), bleu: Neighbors local endo (p2),} + \PY{c+c1}{\PYZsh{} mauve: Neighbors local none (p3), vert: moyenne (p4),} + \PY{c+c1}{\PYZsh{} rouge: persistence (p5)} + + \PY{n}{sum\PYZus{}p123} \PY{o}{=} \PY{n}{e1}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} \PY{o}{+} \PY{n}{e2}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} \PY{o}{+} \PY{n}{e3}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} + \PY{n}{i\PYZus{}np} \PY{o}{=} \PY{n}{which}\PY{o}{.}\PY{n}{min}\PY{p}{(}\PY{n}{sum\PYZus{}p123}\PY{p}{)} \PY{c+c1}{\PYZsh{}indice de (veille de) jour \PYZdq{}facile\PYZdq{}} + \PY{n}{i\PYZus{}p} \PY{o}{=} \PY{n}{which}\PY{o}{.}\PY{n}{max}\PY{p}{(}\PY{n}{sum\PYZus{}p123}\PY{p}{)} \PY{c+c1}{\PYZsh{}indice de (veille de) jour \PYZdq{}difficile\PYZdq{}} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_4_0.png} + \end{center} + { \hspace*{\fill} \\} + + L'erreur absolue dépasse 20 sur 1 à 2 jours suivant les modèles (graphe +en haut à droite). C'est au-delà de ce que l'on aimerait voir (disons ++/- 5 environ). Sur cet exemple le modèle à voisins "contraint" +(local=TRUE) utilisant des pondérations basées sur les similarités de +forme (simtype="endo") obtient en moyenne les meilleurs résultats, avec +un MAPE restant en général inférieur à 30\% de 8h à 19h (7+1 à 7+12 : +graphe en bas à gauche). + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}4}]:} \PY{n}{options}\PY{p}{(}\PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{width}\PY{o}{=}\PY{l+m+mi}{9}\PY{p}{,} \PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{height}\PY{o}{=}\PY{l+m+mi}{4}\PY{p}{)} + \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p3 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p3 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} Bleu: prévue, noir: réalisée} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_6_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_6_1.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_6_2.png} + \end{center} + { \hspace*{\fill} \\} + + Le jour "facile à prévoir", à gauche, se décompose en deux modes : un +léger vers 10h (7+3), puis un beaucoup plus marqué vers 19h (7+12). Ces +deux modes sont retrouvés par les trois variantes de l'algorithme à +voisins, bien que l'amplitude soit mal prédite. Concernant le jour +"difficile à prévoir" il y a deux pics en tout début et toute fin de +journée (à 9h et 23h), qui ne sont pas du tout anticipés par le +programme ; la grande amplitude de ces pics explique alors l'intensité +de l'erreur observée. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}5}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}np1} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}p1} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{f\PYZus{}np2} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}p2} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_8_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_8_1.png} + \end{center} + { \hspace*{\fill} \\} + + Les voisins du jour courant (période de 24h allant de 8h à 7h le +lendemain) sont affichés avec un trait d'autant plus sombre qu'ils sont +proches. On constate dans le cas non contraint (en haut) une grande +variabilité des lendemains, très nette sur le graphe en haut à droite. +Ceci indique une faible corrélation entre la forme d'une courbe sur une +période de 24h et la forme sur les 24h suivantes ; \textbf{cette +observation est la source des difficultés rencontrées par l'algorithme +sur ce jeu de données.} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}6}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotFilamentsBox}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{FilBox p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotFilamentsBox}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{FilBox p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} En pointillés la courbe du jour courant + lendemain (à prédire)} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_0.png} + \end{center} + { \hspace*{\fill} \\} + + Sur cette boxplot fonctionnelle (voir la fonction fboxplot() du package +R "rainbow") l'on constate essentiellement deux choses : le lendemain +d'un voisin "normal" peut se révéler être une courbe atypique, fort +éloignée de ce que l'on souhaite prédire (courbes bleue et rouge à +gauche) ; et, dans le cas d'une courbe à prédire atypique (à droite) la +plupart des voisins sont trop éloignés de la forme à prédire et forcent +ainsi un aplatissement de la prédiction. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}7}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np2}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p2}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_12_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_12_1.png} + \end{center} + { \hspace*{\fill} \\} + + Ces graphes viennent confirmer l'impression visuelle après observation +des filaments. En effet, la variabilité globale en rouge (écart-type +heure par heure sur l'ensemble des couples "aujourd'hui/lendemain" du +passé) devrait rester nettement au-dessus de la variabilité locale, +calculée respectivement sur un voisinage d'une soixantaine de jours +(pour p1) et d'une dizaine de jours (pour p2). Or on constate que ce +n'est pas du tout le cas sur la période "lendemain", sauf en partie pour +p2 le jour 4 \(-\) mais ce n'est pas suffisant. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}8}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} \PYZhy{} pollué à gauche, + pollué à droite} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_14_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_14_1.png} + \end{center} + { \hspace*{\fill} \\} + + Les poids se concentrent près de 0 dans le cas "non local" (p1), et se +répartissent assez uniformément dans \([0,0.2]\) dans le cas "local" +(p2). C'est ce que l'on souhaite observer pour éviter d'effectuer une +simple moyenne. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}9}]:} \PY{c+c1}{\PYZsh{} Fenêtres sélectionnées dans ]0,7] / non\PYZhy{}loc 2 premières lignes, loc ensuite} + \PY{n}{p1}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + \PY{n}{p1}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + + \PY{n}{p2}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + \PY{n}{p2}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} +\end{Verbatim} + + \begin{enumerate*} +\item 0.168824188864717 +\item 0.336969608767438 +\end{enumerate*} + + + \begin{enumerate*} +\item 0.18004595760919 +\item 0.353963007643311 +\end{enumerate*} + + + 1.16620655388085 + + + 1.18148881881259 + + + \subsection*{Pollution par épandage} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}10}]:} \PY{n}{p1} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ep}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{mix}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{FALSE}\PY{p}{)} + \PY{n}{p2} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ep}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{endo}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{p3} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ep}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{none}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{p4} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ep}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Average}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{)} + \PY{n}{p5} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}ep}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Persistence}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{same\PYZus{}day}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} +\end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}11}]:} \PY{n}{e1} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e2} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e3} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e4} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p4}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e5} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p5}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{options}\PY{p}{(}\PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{width}\PY{o}{=}\PY{l+m+mi}{9}\PY{p}{,} \PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{height}\PY{o}{=}\PY{l+m+mi}{7}\PY{p}{)} + \PY{n}{plotError}\PY{p}{(}\PY{n+nb}{list}\PY{p}{(}\PY{n}{e1}\PY{p}{,} \PY{n}{e5}\PY{p}{,} \PY{n}{e4}\PY{p}{,} \PY{n}{e2}\PY{p}{,} \PY{n}{e3}\PY{p}{)}\PY{p}{,} \PY{n}{cols}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{,}\PY{n}{colors}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{258}\PY{p}{]}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{,}\PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} noir: Neighbors non\PYZhy{}local (p1), bleu: Neighbors local endo (p2),} + \PY{c+c1}{\PYZsh{} mauve: Neighbors local none (p3), vert: moyenne (p4),} + \PY{c+c1}{\PYZsh{} rouge: persistence (p5)} + + \PY{n}{sum\PYZus{}p123} \PY{o}{=} \PY{n}{e1}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} \PY{o}{+} \PY{n}{e2}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} \PY{o}{+} \PY{n}{e3}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} + \PY{n}{i\PYZus{}np} \PY{o}{=} \PY{n}{which}\PY{o}{.}\PY{n}{min}\PY{p}{(}\PY{n}{sum\PYZus{}p123}\PY{p}{)} \PY{c+c1}{\PYZsh{}indice de (veille de) jour \PYZdq{}facile\PYZdq{}} + \PY{n}{i\PYZus{}p} \PY{o}{=} \PY{n}{which}\PY{o}{.}\PY{n}{max}\PY{p}{(}\PY{n}{sum\PYZus{}p123}\PY{p}{)} \PY{c+c1}{\PYZsh{}indice de (veille de) jour \PYZdq{}difficile\PYZdq{}} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_19_0.png} + \end{center} + { \hspace*{\fill} \\} + + Il est difficile dans ce cas de déterminer une méthode meilleure que les +autres : elles donnent toutes de plutôt mauvais résultats, avec une +erreur absolue moyennée sur la journée dépassant presque toujours 15 +(graphe en haut à droite). + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}12}]:} \PY{n}{options}\PY{p}{(}\PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{width}\PY{o}{=}\PY{l+m+mi}{9}\PY{p}{,} \PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{height}\PY{o}{=}\PY{l+m+mi}{4}\PY{p}{)} + \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p3 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p3 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} Bleu: prévue, noir: réalisée} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_21_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_21_1.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_21_2.png} + \end{center} + { \hspace*{\fill} \\} + + Dans le cas d'un jour "facile" à prédire \(-\) à gauche \(-\) la forme +est plus ou moins retrouvée, mais le niveau moyen est trop bas (courbe +en bleu). Concernant le jour "difficile" à droite, non seulement la +forme n'est pas anticipée mais surtout le niveau prédit est très +inférieur au niveau de pollution observé. Comme on le voit ci-dessous +cela découle d'un manque de voisins au comportement similaire. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}13}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}np1} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}p1} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{f\PYZus{}np2} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}p2} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_23_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_23_1.png} + \end{center} + { \hspace*{\fill} \\} + + Les observations sont les mêmes qu'au paragraphe précédent : trop de +variabilité des lendemains (et même des voisins du jour courant). + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}14}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotFilamentsBox}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{FilBox p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotFilamentsBox}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{FilBox p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} En pointillés la courbe du jour courant + lendemain (à prédire)} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_25_0.png} + \end{center} + { \hspace*{\fill} \\} + + On constate la présence d'un voisin au lendemain complètement atypique +avec un pic en début de journée (courbe en vert à gauche), et d'un autre +phénomène semblable avec la courbe rouge sur le graphe de droite. Ajouté +au fait que le lendemain à prévoir est lui-même un jour "hors norme", +cela montre l'impossibilité de bien prévoir une courbe en utilisant +l'algorithme à voisins. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}15}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np2}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p2}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_27_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_27_1.png} + \end{center} + { \hspace*{\fill} \\} + + Comme précédemment les variabilités locales et globales sont confondues +dans les parties droites des graphes \(-\) sauf pour la version "locale" +sur le jour "facile" ; mais cette bonne propriété n'est pas suffisante +si l'on ne trouve pas les bons poids à appliquer. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}16}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} \PYZhy{} pollué à gauche, + pollué à droite} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_29_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_29_1.png} + \end{center} + { \hspace*{\fill} \\} + + En comparaison avec le pragraphe précédent on retrouve le même (bon) +comportement des poids pour la version "non locale". En revanche la +fenêtre optimisée est trop grande sur le jour "facile" pour la méthode +"locale" (voir affichage ci-dessous) : il en résulte des poids tous +semblables autour de 0.084, l'algorithme effectue donc une moyenne +simple \(-\) expliquant pourquoi les courbes mauve et bleue sont très +proches sur le graphe d'erreurs. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}17}]:} \PY{c+c1}{\PYZsh{} Fenêtres sélectionnées dans ]0,7] / non\PYZhy{}loc 2 premières lignes, loc ensuite} + \PY{n}{p1}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + \PY{n}{p1}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + + \PY{n}{p2}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + \PY{n}{p2}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} +\end{Verbatim} + + \begin{enumerate*} +\item 0.206604806619633 +\item 0.661854053860987 +\end{enumerate*} + + + \begin{enumerate*} +\item 0.367945958636072 +\item 0.244429852740092 +\end{enumerate*} + + + 6.99993248587025 + + + 1.24825506305085 + + + \subsection*{Semaine non polluée} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}18}]:} \PY{n}{p1} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}np}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{mix}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{FALSE}\PY{p}{)} + \PY{n}{p2} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}np}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{endo}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{p3} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}np}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Neighbors}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{simtype}\PY{o}{=}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{none}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{local}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{p4} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}np}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Average}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{)} + \PY{n}{p5} \PY{o}{=} \PY{n}{computeForecast}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{indices\PYZus{}np}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Persistence}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Zero}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{horizon}\PY{o}{=}\PY{n}{H}\PY{p}{,} + \PY{n}{same\PYZus{}day}\PY{o}{=}\PY{n}{FALSE}\PY{p}{)} +\end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}19}]:} \PY{n}{e1} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e2} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e3} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e4} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p4}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{e5} \PY{o}{=} \PY{n}{computeError}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p5}\PY{p}{,} \PY{n}{H}\PY{p}{)} + \PY{n}{options}\PY{p}{(}\PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{width}\PY{o}{=}\PY{l+m+mi}{9}\PY{p}{,} \PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{height}\PY{o}{=}\PY{l+m+mi}{7}\PY{p}{)} + \PY{n}{plotError}\PY{p}{(}\PY{n+nb}{list}\PY{p}{(}\PY{n}{e1}\PY{p}{,} \PY{n}{e5}\PY{p}{,} \PY{n}{e4}\PY{p}{,} \PY{n}{e2}\PY{p}{,} \PY{n}{e3}\PY{p}{)}\PY{p}{,} \PY{n}{cols}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{,}\PY{n}{colors}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{258}\PY{p}{]}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{,}\PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} noir: Neighbors non\PYZhy{}local (p1), bleu: Neighbors local endo (p2),} + \PY{c+c1}{\PYZsh{} mauve: Neighbors local none (p3), vert: moyenne (p4),} + \PY{c+c1}{\PYZsh{} rouge: persistence (p5)} + + \PY{n}{sum\PYZus{}p123} \PY{o}{=} \PY{n}{e1}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} \PY{o}{+} \PY{n}{e2}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} \PY{o}{+} \PY{n}{e3}\PY{err}{\PYZdl{}}\PY{n+nb}{abs}\PY{err}{\PYZdl{}}\PY{n}{indices} + \PY{n}{i\PYZus{}np} \PY{o}{=} \PY{n}{which}\PY{o}{.}\PY{n}{min}\PY{p}{(}\PY{n}{sum\PYZus{}p123}\PY{p}{)} \PY{c+c1}{\PYZsh{}indice de (veille de) jour \PYZdq{}facile\PYZdq{}} + \PY{n}{i\PYZus{}p} \PY{o}{=} \PY{n}{which}\PY{o}{.}\PY{n}{max}\PY{p}{(}\PY{n}{sum\PYZus{}p123}\PY{p}{)} \PY{c+c1}{\PYZsh{}indice de (veille de) jour \PYZdq{}difficile\PYZdq{}} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_34_0.png} + \end{center} + { \hspace*{\fill} \\} + + Dans ce cas plus favorable les intensité des erreurs absolues ont +clairement diminué : elles restent souvent en dessous de 5. En revanche +le MAPE moyen reste au-delà de 20\%, et même souvent plus de 30\%. Comme +dans le cas de l'épandage on constate une croissance globale de la +courbe journalière d'erreur absolue moyenne (en haut à gauche) ; ceci +peut être dû au fait que l'on ajuste le niveau du jour à prédire en le +recollant sur la dernière valeur observée. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}20}]:} \PY{n}{options}\PY{p}{(}\PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{width}\PY{o}{=}\PY{l+m+mi}{9}\PY{p}{,} \PY{n+nb}{repr}\PY{o}{.}\PY{n}{plot}\PY{o}{.}\PY{n}{height}\PY{o}{=}\PY{l+m+mi}{4}\PY{p}{)} + \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p3 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotPredReal}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p3}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{PredReal p3 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} Bleu: prévue, noir: réalisée} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_36_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_36_1.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_36_2.png} + \end{center} + { \hspace*{\fill} \\} + + La forme est raisonnablement retrouvée pour les méthodes "locales", +l'autre version lissant trop les prédictions. Le biais reste cependant +important, surtout en fin de journée sur le jour "difficile". + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}21}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}np1} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}p1} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{f\PYZus{}np2} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{f\PYZus{}p2} \PY{o}{=} \PY{n}{computeFilaments}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{,} \PY{n}{plot}\PY{o}{=}\PY{n}{TRUE}\PY{p}{)} + \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Filaments p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_38_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_38_1.png} + \end{center} + { \hspace*{\fill} \\} + + Les graphes de filaments ont encore la même allure, avec une assez +grande variabilité observée. Cette observation est cependant trompeuse, +comme l'indique plus bas le graphe de variabilité relative. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}22}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotFilamentsBox}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{FilBox p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotFilamentsBox}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{FilBox p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} En pointillés la courbe du jour courant + lendemain (à prédire)} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_40_0.png} + \end{center} + { \hspace*{\fill} \\} + + On peut réappliquer les mêmes remarques qu'auparavant sur les boxplots +fonctionnels : lendemains de voisins atypiques, courbe à prévoir +elle-même légèrement "hors norme". + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}23}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p1}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}np2}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotRelVar}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{f\PYZus{}p2}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{StdDev p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_42_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_42_1.png} + \end{center} + { \hspace*{\fill} \\} + + Cette fois la situation idéale est observée : la variabilité globale est +nettement au-dessus de la variabilité locale. Bien que cela ne suffise +pas à obtenir de bonnes prédictions de forme, on constate au moins +l'amélioration dans la prédiction du niveau. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}24}]:} \PY{n}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n}{c}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p1}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p1 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{p}{)} + \PY{n}{plotSimils}\PY{p}{(}\PY{n}{p2}\PY{p}{,} \PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{;} \PY{n}{title}\PY{p}{(}\PY{n}{paste}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Weights p2 day}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{p}{)} + + \PY{c+c1}{\PYZsh{} \PYZhy{} pollué à gauche, + pollué à droite} +\end{Verbatim} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_44_0.png} + \end{center} + { \hspace*{\fill} \\} + + \begin{center} + \adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_44_1.png} + \end{center} + { \hspace*{\fill} \\} + + Concernant les poids en revanche, deux cas a priori mauvais se cumulent +: * les poids dans le cas "non local" ne sont pas assez concentrés +autour de 0, menant à un lissage trop fort \(-\) comme observé sur les +graphes des courbes réalisées/prévues ; * les poids dans le cas "local" +sont trop semblables (à cause de la trop grande fenêtre optimisée par +validation croisée, cf. ci-dessous), résultant encore en une moyenne +simple \(-\) mais sur moins de jours, plus proches du jour courant. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}25}]:} \PY{c+c1}{\PYZsh{} Fenêtres sélectionnées dans ]0,7] / non\PYZhy{}loc 2 premières lignes, loc ensuite} + \PY{n}{p1}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + \PY{n}{p1}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + + \PY{n}{p2}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}np}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} + \PY{n}{p2}\PY{err}{\PYZdl{}}\PY{n}{getParams}\PY{p}{(}\PY{n}{i\PYZus{}p}\PY{p}{)}\PY{err}{\PYZdl{}}\PY{n}{window} +\end{Verbatim} + + \begin{enumerate*} +\item 0.205055690975915 +\item 0.703482647754766 +\end{enumerate*} + + + \begin{enumerate*} +\item 1.1038650998802 +\item 0.885155748316133 +\end{enumerate*} + + + 3.64336124381868 + + + 6.99994501761361 + + + \subsection*{Bilan} + +Nos algorithmes à voisins ne sont pas adaptés à ce jeu de données où la +forme varie considérablement d'un jour à l'autre. Plus généralement +cette décorrélation de forme rend ardue la tâche de prévision pour toute +autre méthode \(-\) du moins, nous ne savons pas comment procéder pour +parvenir à une bonne précision. + +Toutefois, un espoir reste permis par exemple en aggréger les courbes +spatialement (sur plusieurs stations situées dans la même agglomération +ou dans une même zone). + + + % Add a bibliography block to the postdoc + + + + \end{document} -- 2.44.0 From 689aa1d3c58e8b1e9fdfa9b895ca38c7228f56cc Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Fri, 14 Apr 2017 12:23:33 +0200 Subject: [PATCH 16/16] 'update' --- .gitignore | 1 + AirNormand.zip | 1 + Untitled.tex | 468 ++++++++++++++++++++++++ pkg/R/Data.R | 2 + pkg/R/F_Average.R | 2 + pkg/R/F_Neighbors.R | 36 +- pkg/R/F_Persistence.R | 2 + pkg/R/F_Zero.R | 2 + pkg/R/Forecast.R | 2 + pkg/R/Forecaster.R | 2 + pkg/R/utils.R | 16 +- reports/rapport_final/report_P7_H17.tex | 1 + 12 files changed, 510 insertions(+), 25 deletions(-) create mode 100644 AirNormand.zip create mode 100644 Untitled.tex diff --git a/.gitignore b/.gitignore index 92a17ad..7b5eb35 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ data/*.csv #misc *.png #*.tex +/AirNormand/ #.gitattributes/.gitfat files are generated by initialize.sh .gitattributes diff --git a/AirNormand.zip b/AirNormand.zip new file mode 100644 index 0000000..99fb2d0 --- /dev/null +++ b/AirNormand.zip @@ -0,0 +1 @@ +#$# git-fat cf4cf6c03e09a9aa2374a7250d21ce732c923e1e 49845703 diff --git a/Untitled.tex b/Untitled.tex new file mode 100644 index 0000000..fe2c0eb --- /dev/null +++ b/Untitled.tex @@ -0,0 +1,468 @@ + +% Default to the notebook output style + + + + +% Inherit from the specified cell style. + + + + + +\documentclass[11pt]{article} + + + + \usepackage[T1]{fontenc} + % Nicer default font (+ math font) than Computer Modern for most use cases + \usepackage{mathpazo} + + % Basic figure setup, for now with no caption control since it's done + % automatically by Pandoc (which extracts ![](path) syntax from Markdown). + \usepackage{graphicx} + % We will generate all images so they have a width \maxwidth. This means + % that they will get their normal width if they fit onto the page, but + % are scaled down if they would overflow the margins. + \makeatletter + \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth + \else\Gin@nat@width\fi} + \makeatother + \let\Oldincludegraphics\includegraphics + % Set max figure width to be 80% of text width, for now hardcoded. + \renewcommand{\includegraphics}[1]{\Oldincludegraphics[width=.8\maxwidth]{#1}} + % Ensure that by default, figures have no caption (until we provide a + % proper Figure object with a Caption API and a way to capture that + % in the conversion process - todo). + \usepackage{caption} + \DeclareCaptionLabelFormat{nolabel}{} + \captionsetup{labelformat=nolabel} + + \usepackage{adjustbox} % Used to constrain images to a maximum size + \usepackage{xcolor} % Allow colors to be defined + \usepackage{enumerate} % Needed for markdown enumerations to work + \usepackage{geometry} % Used to adjust the document margins + \usepackage{amsmath} % Equations + \usepackage{amssymb} % Equations + \usepackage{textcomp} % defines textquotesingle + % Hack from http://tex.stackexchange.com/a/47451/13684: + \AtBeginDocument{% + \def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code + } + \usepackage{upquote} % Upright quotes for verbatim code + \usepackage{eurosym} % defines \euro + \usepackage[mathletters]{ucs} % Extended unicode (utf-8) support + \usepackage[utf8x]{inputenc} % Allow utf-8 characters in the tex document + \usepackage{fancyvrb} % verbatim replacement that allows latex + \usepackage{grffile} % extends the file name processing of package graphics + % to support a larger range + % The hyperref package gives us a pdf with properly built + % internal navigation ('pdf bookmarks' for the table of contents, + % internal cross-reference links, web links for URLs, etc.) + \usepackage{hyperref} + \usepackage{longtable} % longtable support required by pandoc >1.10 + \usepackage{booktabs} % table support for pandoc > 1.12.2 + \usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment) + \usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout) + % normalem makes italics be italics, not underlines + + + + + % Colors for the hyperref package + \definecolor{urlcolor}{rgb}{0,.145,.698} + \definecolor{linkcolor}{rgb}{.71,0.21,0.01} + \definecolor{citecolor}{rgb}{.12,.54,.11} + + % ANSI colors + \definecolor{ansi-black}{HTML}{3E424D} + \definecolor{ansi-black-intense}{HTML}{282C36} + \definecolor{ansi-red}{HTML}{E75C58} + \definecolor{ansi-red-intense}{HTML}{B22B31} + \definecolor{ansi-green}{HTML}{00A250} + \definecolor{ansi-green-intense}{HTML}{007427} + \definecolor{ansi-yellow}{HTML}{DDB62B} + \definecolor{ansi-yellow-intense}{HTML}{B27D12} + \definecolor{ansi-blue}{HTML}{208FFB} + \definecolor{ansi-blue-intense}{HTML}{0065CA} + \definecolor{ansi-magenta}{HTML}{D160C4} + \definecolor{ansi-magenta-intense}{HTML}{A03196} + \definecolor{ansi-cyan}{HTML}{60C6C8} + \definecolor{ansi-cyan-intense}{HTML}{258F8F} + \definecolor{ansi-white}{HTML}{C5C1B4} + \definecolor{ansi-white-intense}{HTML}{A1A6B2} + + % commands and environments needed by pandoc snippets + % extracted from the output of `pandoc -s` + \providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} + \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} + % Add ',fontsize=\small' for more characters per line + \newenvironment{Shaded}{}{} + \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} + \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}} + \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}} + \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}} + \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}} + \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} + \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}} + \newcommand{\RegionMarkerTok}[1]{{#1}} + \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}} + \newcommand{\NormalTok}[1]{{#1}} + + % Additional commands for more recent versions of Pandoc + \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}} + \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}} + \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}} + \newcommand{\ImportTok}[1]{{#1}} + \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}} + \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}} + \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}} + \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}} + \newcommand{\BuiltInTok}[1]{{#1}} + \newcommand{\ExtensionTok}[1]{{#1}} + \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}} + \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}} + \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}} + + + % Define a nice break command that doesn't care if a line doesn't already + % exist. + \def\br{\hspace*{\fill} \\* } + % Math Jax compatability definitions + \def\gt{>} + \def\lt{<} + % Document parameters + \title{Untitled} + + + + + % Pygments definitions + +\makeatletter +\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax% + \let\PY@ul=\relax \let\PY@tc=\relax% + \let\PY@bc=\relax \let\PY@ff=\relax} +\def\PY@tok#1{\csname PY@tok@#1\endcsname} +\def\PY@toks#1+{\ifx\relax#1\empty\else% + \PY@tok{#1}\expandafter\PY@toks\fi} +\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{% + \PY@it{\PY@bf{\PY@ff{#1}}}}}}} +\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}} + +\expandafter\def\csname PY@tok@w\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}} +\expandafter\def\csname PY@tok@c\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}} +\expandafter\def\csname PY@tok@k\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}} +\expandafter\def\csname PY@tok@o\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@ow\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} +\expandafter\def\csname PY@tok@nb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@nf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@nc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@nn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@ne\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}} +\expandafter\def\csname PY@tok@nv\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@no\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@nl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}} +\expandafter\def\csname PY@tok@ni\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}} +\expandafter\def\csname PY@tok@na\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}} +\expandafter\def\csname PY@tok@nt\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@nd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}} +\expandafter\def\csname PY@tok@s\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sd\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@si\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}} +\expandafter\def\csname PY@tok@se\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}} +\expandafter\def\csname PY@tok@sr\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}} +\expandafter\def\csname PY@tok@ss\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@sx\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@m\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@gh\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@gu\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@gd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@gi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}} +\expandafter\def\csname PY@tok@gr\endcsname{\def\PY@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}} +\expandafter\def\csname PY@tok@ge\endcsname{\let\PY@it=\textit} +\expandafter\def\csname PY@tok@gs\endcsname{\let\PY@bf=\textbf} +\expandafter\def\csname PY@tok@gp\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}} +\expandafter\def\csname PY@tok@go\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}} +\expandafter\def\csname PY@tok@gt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}} +\expandafter\def\csname PY@tok@err\endcsname{\def\PY@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}} +\expandafter\def\csname PY@tok@kc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kd\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@kr\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@bp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}} +\expandafter\def\csname PY@tok@fm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}} +\expandafter\def\csname PY@tok@vc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vg\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@vm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}} +\expandafter\def\csname PY@tok@sa\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@dl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@s2\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@sh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@s1\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}} +\expandafter\def\csname PY@tok@mb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@il\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@mo\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}} +\expandafter\def\csname PY@tok@ch\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cm\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cpf\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@c1\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} +\expandafter\def\csname PY@tok@cs\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}} + +\def\PYZbs{\char`\\} +\def\PYZus{\char`\_} +\def\PYZob{\char`\{} +\def\PYZcb{\char`\}} +\def\PYZca{\char`\^} +\def\PYZam{\char`\&} +\def\PYZlt{\char`\<} +\def\PYZgt{\char`\>} +\def\PYZsh{\char`\#} +\def\PYZpc{\char`\%} +\def\PYZdl{\char`\$} +\def\PYZhy{\char`\-} +\def\PYZsq{\char`\'} +\def\PYZdq{\char`\"} +\def\PYZti{\char`\~} +% for compatibility with earlier versions +\def\PYZat{@} +\def\PYZlb{[} +\def\PYZrb{]} +\makeatother + + + % Exact colors from NB + \definecolor{incolor}{rgb}{0.0, 0.0, 0.5} + \definecolor{outcolor}{rgb}{0.545, 0.0, 0.0} + + + + + % Prevent overflowing lines due to hard-to-break entities + \sloppy + % Setup hyperref package + \hypersetup{ + breaklinks=true, % so long urls are correctly broken across lines + colorlinks=true, + urlcolor=urlcolor, + linkcolor=linkcolor, + citecolor=citecolor, + } + % Slightly bigger margins than the latex defaults + + \geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in} + + + + \begin{document} + + + \maketitle + + + + + \subsection{Package R "talweg"}\label{package-r-talweg} + +Le package \(-\) Time-series sAmpLes forecasted With ExoGenous variables +\(-\) contient le code permettant de (re)lancer les expériences +numériques décrites dans cette partie et la suivante. Les fonctions +principales sont respectivement * \textbf{getData()} pour construire un +objet R contenant les données à partir de fichiers CSV (extraits de +bases de données). Le format choisi en R est une classe R6 (du package +du même nom) exposant en particulier les méthodes \emph{getSerie(i)} et +\emph{getExo(i)} qui renvoient respectivement la \(i^{eme}\) série de +24h et les variables exogènes (mesurées) correspondantes. Voir ?Data +pour plus d'information, une fois le package chargé. * +\textbf{computeForecast()} pour calculer des prédictions sur une +certaine plage temporelle contenue dans \emph{data \textless{}- +getData(...)} * \textbf{computeError()} pour évaluer les erreurs +commises par différentes méthodes. + +Le package contient en outre diverses fonctions graphiques +\emph{plotXXX()}, utilisées dans la partie suivante. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}3}]:} \PY{c+c1}{\PYZsh{} Chargement de la librairie (après compilation, \PYZdq{}R CMD INSTALL .\PYZdq{})} + \PY{k+kn}{library}\PY{p}{(}talweg\PY{p}{)} + + \PY{c+c1}{\PYZsh{} Acquisition des données (depuis les fichiers CSV)} + ts\PYZus{}data \PY{o}{\PYZlt{}\PYZhy{}} read.csv\PY{p}{(}\PY{k+kp}{system.file}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{extdata\PYZdq{}}\PY{p}{,}\PY{l+s}{\PYZdq{}}\PY{l+s}{pm10\PYZus{}mesures\PYZus{}H\PYZus{}loc.csv\PYZdq{}}\PY{p}{,}package\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{talweg\PYZdq{}}\PY{p}{)}\PY{p}{)} + exo\PYZus{}data \PY{o}{\PYZlt{}\PYZhy{}} read.csv\PY{p}{(}\PY{k+kp}{system.file}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{extdata\PYZdq{}}\PY{p}{,}\PY{l+s}{\PYZdq{}}\PY{l+s}{meteo\PYZus{}extra\PYZus{}noNAs.csv\PYZdq{}}\PY{p}{,}package\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{talweg\PYZdq{}}\PY{p}{)}\PY{p}{)} + data \PY{o}{\PYZlt{}\PYZhy{}} getData\PY{p}{(}ts\PYZus{}data\PY{p}{,} exo\PYZus{}data\PY{p}{,} input\PYZus{}tz\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{GMT\PYZdq{}}\PY{p}{,} date\PYZus{}format\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{\PYZpc{}d/\PYZpc{}m/\PYZpc{}Y \PYZpc{}H:\PYZpc{}M\PYZdq{}}\PY{p}{,} + working\PYZus{}tz\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{GMT\PYZdq{}}\PY{p}{,} predict\PYZus{}at\PY{o}{=}\PY{l+m}{7}\PY{p}{,} limit\PY{o}{=}\PY{l+m}{120}\PY{p}{)} + \PY{c+c1}{\PYZsh{} Plus de détails à la section 1 ci\PYZhy{}après.} + + \PY{c+c1}{\PYZsh{} Prédiction de 10 courbes (jours 102 à 111)} + pred \PY{o}{\PYZlt{}\PYZhy{}} computeForecast\PY{p}{(}data\PY{p}{,} \PY{l+m}{101}\PY{o}{:}\PY{l+m}{110}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{Persistence\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{Zero\PYZdq{}}\PY{p}{,} memory\PY{o}{=}\PY{l+m}{50}\PY{p}{,} horizon\PY{o}{=}\PY{l+m}{12}\PY{p}{,} ncores\PY{o}{=}\PY{l+m}{1}\PY{p}{)} + \PY{c+c1}{\PYZsh{} Plus de détails à la section 2 ci\PYZhy{}après.} + + \PY{c+c1}{\PYZsh{} Calcul des erreurs (sur un horizon arbitraire \PYZlt{}= horizon de prédiction)} + err \PY{o}{\PYZlt{}\PYZhy{}} computeError\PY{p}{(}data\PY{p}{,} pred\PY{p}{,} horizon\PY{o}{=}\PY{l+m}{6}\PY{p}{)} + \PY{c+c1}{\PYZsh{} Plus de détails à la section 3 ci\PYZhy{}après.} + + \PY{c+c1}{\PYZsh{} Puis voir ?plotError et les autres plot dans le paragraphe \PYZsq{}seealso\PYZsq{}} +\end{Verbatim} + + \subsubsection{getData()}\label{getdata} + +Les arguments de cette fonction sont, dans l'ordre : 1. +\textbf{ts\_data} : séries temporelles (fichier CSV avec entête ou +data.frame) ; la première colonne contient les heures, la seconde les +valeurs. 2. \textbf{exo\_data} : variables exogènes (fichier CSV avec +entête ou data.frame) ; la première colonne contient les jours, les +\(m\) suivantes les variables mesurées pour ce jour, et les \(m\) +dernières les variables prédites pour ce même jour. Dans notre cas +\(m=4\) : pression, température, gradient de température, vitesse du +vent. 3. \textbf{input\_tz} : zone horaire pour ts\_data (défaut : +"GMT"). 4. \textbf{date\_format} : format des heures dans ts\_data +(défaut : "\%d/\%m/\%Y \%H:\%M", format du fichier transmis par Michel). +5. \textbf{working\_tz} : zone horaire dans laquelle on souhaite +travailler avec les données (défaut : "GMT"). 6. \textbf{predict\_at} : +heure à laquelle s'effectue la prévision \(-\) et donc dernière heure +d'un bloc de 24h, relativement à working\_tz. data\texttt{\$}getSerie(3) +renvoit ainsi les 24 valeurs de 8h à 7h pour le \(3^{eme}\) bloc de 24h +présent dans le jeu de données. + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}11}]:} \PY{k+kp}{print}\PY{p}{(}data\PY{p}{)} + \PY{c+c1}{\PYZsh{}?Data} +\end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] + + Public: + append: function (time, serie, exo, exo\_hat) + clone: function (deep = FALSE) + getCenteredSerie: function (index) + getCenteredSeries: function (indices) + getExo: function (index) + getExoHat: function (index) + getLevel: function (index) + getSerie: function (index) + getSeries: function (indices) + getSize: function () + getStdHorizon: function () + getTime: function (index) + removeFirst: function () + removeLast: function () + Private: + .data: list + + \end{Verbatim} + + \subsubsection{computeForecast()}\label{computeforecast} + +Les arguments de cette fonction sont, dans l'ordre : 1. \textbf{data} : +le jeu de données renvoyé par getData() 2. \textbf{indices} : l'ensemble +de jours dont on veut prévoir les "lendemains" (prochains blocs de 24h) +; peut être donnée sous forme d'un vecteur de dates ou d'entiers +(correspondants aux numéros des jours). 3. \textbf{forecaster} : le nom +du prédicteur principal à utiliser ; voir ?computeForecast 4. +\textbf{pjump} : le nom du prédicteur de saut d'une série à l'autre ; +voir ?computeForecast 5. \textbf{memory} : le nombre de jours à prendre +en compte dans le passé pour chaque prévision (par défaut : Inf, +c'est-à-dire tout l'historique pris en compte). 6. \textbf{horizon} : le +nombre d'heures à prédire ; par défaut "data\texttt{\$}getStdHorizon()", +c'est-à-dire le nombre d'heures restantes à partir de l'instant de +prévision + 1 jusqu'à minuit (17 pour predict\_at=7 par exemple). 7. +\textbf{ncores} : le nombre de processus parallèles (utiliser 1 pour une +exécution séquentielle) + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}8}]:} \PY{k+kp}{print}\PY{p}{(}pred\PY{p}{)} + \PY{c+c1}{\PYZsh{}?computeForecast} +\end{Verbatim} + + \begin{Verbatim}[commandchars=\\\{\}] + + Public: + append: function (forecast, params, index\_in\_data) + clone: function (deep = FALSE) + getDates: function () + getForecast: function (index) + getIndexInData: function (index) + getParams: function (index) + getSize: function () + initialize: function (dates) + Private: + .dates: 14323 14324 14325 14326 14327 14328 14329 14330 14331 14332 + .pred: list + + \end{Verbatim} + + \subsubsection{computeError()}\label{computeerror} + +Les arguments de cette fonction sont, dans l'ordre : 1. \textbf{data} : +le jeu de données renvoyé par getData() 2. \textbf{pred} : les +prédictions renvoyées par computeForecast() 3. \textbf{horizon} : le +nombre d'heures à considérer pour le calcul de l'erreur ; doit être +inférieur ou égal à l'horizon utilisé pour la prédiction (même valeur +par défaut : "data\texttt{\$}getStdHorizon()") + + \begin{Verbatim}[commandchars=\\\{\}] +{\color{incolor}In [{\color{incolor}9}]:} \PY{k+kp}{summary}\PY{p}{(}err\PY{p}{)} + \PY{k+kp}{summary}\PY{p}{(}err\PY{o}{\PYZdl{}}\PY{k+kp}{abs}\PY{p}{)} + \PY{k+kp}{summary}\PY{p}{(}err\PY{o}{\PYZdl{}}MAPE\PY{p}{)} +\end{Verbatim} + + + \begin{verbatim} + Length Class Mode +abs 2 -none- list +MAPE 2 -none- list + \end{verbatim} + + + + \begin{verbatim} + Length Class Mode +day 6 -none- numeric +indices 10 -none- numeric + \end{verbatim} + + + + \begin{verbatim} + Length Class Mode +day 6 -none- numeric +indices 10 -none- numeric + \end{verbatim} + + + \subsubsection{Graphiques}\label{graphiques} + +Voir ?plotError : les autres fonctions graphiques sont dans la section +'seealso' : + +\begin{verbatim} + ‘plotCurves’, ‘plotPredReal’, ‘plotSimils’, ‘plotFbox’, + ‘computeFilaments’, ‘plotFilamentsBox’, ‘plotRelVar’ +\end{verbatim} + +?plotXXX, etc. + + + % Add a bibliography block to the postdoc + + + + \end{document} diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 92ba2c1..3820d57 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -9,6 +9,8 @@ #' \code{getTime()} below. Each cell .data[[i]] is itself a list containing five slots, #' as described in the 'field' section. #' +#' @usage data <- Data$new() +#' #' @field .data[[i]] List of #' \itemize{ #' \item time: vector of times diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index 8f81747..a1f29ad 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -7,6 +7,8 @@ #' averaged to provide a smooth prediction. This forecast will most of the time be wrong, #' but will also look plausible enough. #' +#' @usage f <- AverageForecaster$new(pjump) +#' #' @docType class #' @format R6 class, inherits Forecaster #' @aliases F_Average diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 32d5cff..51b5730 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -28,6 +28,8 @@ #' obtain the final prediction. #' } #' +#' @usage f <- NeighborsForecaster$new(pjump) +#' #' @docType class #' @format R6 class, inherits Forecaster #' @aliases F_Neighbors @@ -216,16 +218,16 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", ) ) -#' getConstrainedNeighbs -#' -#' Get indices of neighbors of similar pollution level (among same season + day type). -#' -#' @param today Index of current day -#' @param data Object of class Data -#' @param fdays Current set of "first days" (no-NA pairs) -#' @param min_neighbs Minimum number of points in a neighborhood -#' @param max_neighbs Maximum number of points in a neighborhood -#' +# getConstrainedNeighbs +# +# Get indices of neighbors of similar pollution level (among same season + day type). +# +# @param today Index of current day +# @param data Object of class Data +# @param fdays Current set of "first days" (no-NA pairs) +# @param min_neighbs Minimum number of points in a neighborhood +# @param max_neighbs Maximum number of points in a neighborhood +# .getConstrainedNeighbs = function(today, data, fdays, min_neighbs=10, max_neighbs=12) { levelToday = data$getLevel(today) @@ -252,13 +254,13 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", fdays } -#' compute similarities -#' -#' Apply the gaussian kernel on computed squared distances. -#' -#' @param distances2 Squared distances -#' @param window Window parameter for the kernel -#' +# compute similarities +# +# Apply the gaussian kernel on computed squared distances. +# +# @param distances2 Squared distances +# @param window Window parameter for the kernel +# .computeSimils <- function(distances2, window) { sd_dist = sd(distances2) diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index fa5f99f..3eefa5b 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -8,6 +8,8 @@ #' If the last similar day has missing values, the next one is searched, and so on until #' one full serie is found (if no one is found, NA is returned). #' +#' @usage f <- PersistenceForecaster$new(pjump) +#' #' @docType class #' @format R6 class, inherits Forecaster #' @aliases F_Persistence diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index 2c9a7b7..dae43e8 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -3,6 +3,8 @@ #' Flat prediction: always forecast a serie of zeros. #' This serie is then adjusted using the ".pjump" function (see \code{Forecaster} class). #' +#' @usage f <- ZeroForecaster$new(pjump) +#' #' @docType class #' @format R6 class, inherits Forecaster #' @aliases F_Zero diff --git a/pkg/R/Forecast.R b/pkg/R/Forecast.R index 6036daf..bb665bd 100644 --- a/pkg/R/Forecast.R +++ b/pkg/R/Forecast.R @@ -9,6 +9,8 @@ #' Each cell .pred[[i]] is itself a list containing three slots, as described in the #' 'field' section. #' +#' @usage f <- Forecast$new(dates) +#' #' @field .pred List with #' \itemize{ #' \item serie: the forecasted serie diff --git a/pkg/R/Forecaster.R b/pkg/R/Forecaster.R index 2efa9ba..ce1bb35 100644 --- a/pkg/R/Forecaster.R +++ b/pkg/R/Forecaster.R @@ -9,6 +9,8 @@ #' serie, and then calls the "jump prediction" function -- see "field" section -- to #' adjust it based on the last observed values. #' +#' @usage f <- Forecaster$new(pjump) #warning: predictShape() is unimplemented +#' #' @field .params List of computed parameters (if applicable). #' @field .pjump Function: how to predict the jump at day interface? The arguments of #' this function are -- in this order: diff --git a/pkg/R/utils.R b/pkg/R/utils.R index 3f32868..b0d0ae0 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -116,14 +116,14 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) return (day == day_ref) } -#' getNoNA2 -#' -#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. -#' -#' @inheritParams dateIndexToInteger -#' @param first First index (included) -#' @param last Last index (included) -#' +# getNoNA2 +# +# Get indices in data of no-NA series followed by no-NA, within [first,last] range. +# +# @inheritParams dateIndexToInteger +# @param first First index (included) +# @param last Last index (included) +# .getNoNA2 = function(data, first, last) { (first:last)[ sapply(first:last, function(i) diff --git a/reports/rapport_final/report_P7_H17.tex b/reports/rapport_final/report_P7_H17.tex index 012df04..5d0886b 100644 --- a/reports/rapport_final/report_P7_H17.tex +++ b/reports/rapport_final/report_P7_H17.tex @@ -271,6 +271,7 @@ \geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in} +\graphicspath{{./figs/}} \begin{document} -- 2.44.0