+#' @importFrom grDevices colors gray.colors
+#' @importFrom graphics abline hist par plot
+#' @importFrom methods hasArg is
+#' @importFrom stats quantile sd
+#' @importFrom utils getFromNamespace read.csv tail
+#' @importFrom R6 R6Class
+#'
#' @include Data.R
#' @include Forecast.R
#' @include Forecaster.R
#' @include computeError.R
#' @include plot.R
#'
-#' @importFrom grDevices colors gray.colors
-#' @importFrom graphics abline hist par plot
-#' @importFrom methods hasArg is
-#' @importFrom stats quantile sd
-#' @importFrom utils getFromNamespace read.csv tail
-#'
NULL
#' Data
#'
-#' Data encapsulation
+#' Data encapsulation as a list (days) of lists (components).
#'
-#' @docType class
-#' @importFrom R6 R6Class
+#' The private field .data is a list where each cell contains the variables for a period
+#' of time of 24 hours, from time P+1 until P the next day where P is an integer between
+#' 0 and 23. .data[[1]] refers to the first measured data (serie and exogenous
+#' variables): the corresponding times can be accessed through the function
+#' \code{getTime()} below. Each cell .data[[i]] is itself a list containing five slots,
+#' as described in the 'field' section.
#'
-#' @field .data List of
+#' @field .data[[i]] List of
#' \itemize{
#' \item time: vector of times
-#' \item centered_serie: centered series
-#' \item level: corresponding levels
+#' \item centered_serie: centered serie
+#' \item level: corresponding level
#' \item exo: exogenous variables
-#' \item exo_hat: predicted exogenous variables
+#' \item exo_hat: predicted exogenous variables (for next day)
#' }
#'
#' @section Methods:
#' \describe{
#' \item{\code{getSize()}}{
-#' Return number of series in dataset.}
+#' Number of series in dataset.}
#' \item{\code{getStdHorizon()}}{
-#' Return number of time steps from serie[1] until midnight}
-#' \item{\code{appendHat(time, exo_hat)}}{
-#' New estimated exogenous variables + time}
-#' \item{\code{append(serie, exo)}}{
-#' New measured data; call *after* \code{appendHat()}}
+#' Number of time steps from serie[1] until midnight}
+#' \item{\code{append(time, serie, exo, exo_hat)}}{
+#' Measured data for given vector of times + exogenous predictions from last midgnight.
#' \item{\code{getTime(index)}}{
-#' Get times at specified index.}
+#' Times (vector) at specified index.}
#' \item{\code{getCenteredSerie(index)}}{
-#' Get (measured or predicted) centered serie at specified index.}
+#' Centered serie at specified index.}
#' \item{\code{getCenteredSeries(indices)}}{
-#' Get centered series at specified indices (in columns).}
+#' Centered series at specified indices (in columns).}
#' \item{\code{getLevel(index)}}{
-#' Get level at specified index.}
+#' Level at specified index.}
#' \item{\code{getSerie(index)}}{
-#' Get serie (centered+level) at specified index.}
+#' Serie (centered+level) at specified index.}
#' \item{\code{getSeries(indices)}}{
-#' Get series at specified indices (in columns).}
+#' Series at specified indices (in columns).}
#' \item{\code{getExoHat(index)}}{
-#' Get predicted exogenous variables at specified index.}
+#' Predicted exogenous variables at specified index.}
#' \item{\code{getExo(index)}}{
-#' Get exogenous variables at specified index.}
+#' Measured exogenous variables at specified index.}
#' \item{\code{removeFirst()}}{
#' Remove first list element (if truncated).}
#' \item{\code{removeLast()}}{
#' Remove last list element (if truncated).}
#' }
#'
+#' @docType class
+#' @format R6 class
+#'
Data = R6::R6Class("Data",
private = list(
.data = list()
#' Average Forecaster
#'
-#' Pointwise average of all series of the same (day of week) days in the past.
+#' Pointwise average of all the series of the same day of week in the past.
#'
+#' For example, if the current day (argument "today") is a tuesday, then all series
+#' corresponding to wednesdays in the past (until the beginning or memory limit) are
+#' averaged to provide a smooth prediction. This forecast will most of the time be wrong,
+#' but will also look plausible enough.
+#'
+#' @docType class
#' @format R6 class, inherits Forecaster
#' @alias F_Average
#'
#'
#' Predict next serie as a weighted combination of "futures of the past" days,
#' where days in the past are chosen and weighted according to some similarity measures.
-#' See 'details' section.
#'
-#' TODO: details.
+#' The main method is \code{predictShape()}, taking arguments data, today, memory,
+#' horizon respectively for the dataset (object output of \code{getData()}), the current
+#' index, the data depth (in days) and the number of time steps to forecast.
+#' In addition, optional arguments can be passed:
+#' \itemize{
+#' \item local : TRUE (default) to constrain neighbors to be "same days within same
+#' season"
+#' \item simtype : 'endo' for a similarity based on the series only,<cr>
+#' 'exo' for a similaruty based on exogenous variables only,<cr>
+#' 'mix' for the product of 'endo' and 'exo',<cr>
+#' 'none' (default) to apply a simple average: no computed weights
+#' \item window : A window for similarities computations; override cross-validation
+#' window estimation.
+#' }
+#' The method is summarized as follows:
+#' \enumerate{
+#' \item Determine N (=20) recent days without missing values, and followed by a
+#' tomorrow also without missing values.
+#' \item Optimize the window parameters (if relevant) on the N chosen days.
+#' \item Considering the optimized window, compute the neighbors (with locality
+#' constraint or not), compute their similarities -- using a gaussian kernel if
+#' simtype != "none" -- and average accordingly the "tomorrows of neigbors" to
+#' obtain the final prediction.
+#' }
#'
+#' @docType class
#' @format R6 class, inherits Forecaster
#' @alias F_Neighbors
#'
#' Persistence Forecaster
#'
#' Look for the most recent similar day in the past, and return its corresponding curve.
-#' "Similar day" means "same day in the week".
#'
+#' There are two variations, depending whether "similar day" means "same day in the week"
+#' or "most recent day" (regardless of day type). The corresponding argument is named
+#' "same_day": a value of TRUE implies the former interpretation (same day in week).
+#' If the last similar day has missing values, the next one is searched, and so on until
+#' one full serie is found (if no one is found, NA is returned).
+#'
+#' @docType class
#' @format R6 class, inherits Forecaster
#' @alias F_Persistence
#'
#' Zero Forecaster
#'
-#' Flat prediction of next series of 'horizon' values.
+#' Flat prediction: always forecast a serie of zeros.
+#' This serie is then adjusted using the ".pjump" function (see \code{Forecaster} class).
#'
+#' @docType class
#' @format R6 class, inherits Forecaster
#' @alias F_Zero
#'
#' Forecast
#'
-#' Forecast encapsulation
+#' Forecast encapsulation as a list (days where prediction occur) of lists (components).
#'
-#' @docType class
-#' @importFrom R6 R6Class
+#' The private field .pred is a list where each cell contains the predicted variables for
+#' a period of time of H<=24 hours, from hour P+1 until P+H, where P+1 is taken right
+#' after the end of the period designated by \code{getIndexInData()}. In other terms,
+#' \code{forecast$getForecast(i)} return forecasts for \code{data$getSerie(i+1)}.
+#' Each cell .pred[[i]] is itself a list containing three slots, as described in the
+#' 'field' section.
#'
-#' @field .pred List with \itemize{
-#' \item serie: forecasted serie
+#' @field .pred List with
+#' \itemize{
+#' \item serie: the forecasted serie
#' \item params: corresponding list of parameters (weights, neighbors...)
-#' \item index_in_data: corresponding index in data object}
-#' @field .dates vector of day indices where forcast occurs
+#' \item index_in_data: corresponding index in data object
+#' }
+#' @field .dates vector of (integer) day indices where forecast occurs
#'
#' @section Methods:
#' \describe{
#' Get index in data which corresponds to current forecast.}
#' }
#'
+#' @docType class
+#' @format R6 class
+#'
Forecast = R6::R6Class("Forecast",
private = list(
.pred = list(),
#' Forecaster
#'
-#' Forecaster (abstract class, implemented by all forecasters)
+#' Forecaster (abstract class, implemented by all forecasters).
#'
-#' @docType class
-#' @importFrom R6 R6Class
+#' A Forecaster object encapsulates parameters (which can be of various kinds, for
+#' example "Neighbors" method stores informations about the considered neighborhood for
+#' the current prediction task) and one main function: \code{predictSerie()}. This last
+#' function (by default) calls \code{predictShape()} to get a forecast of a centered
+#' serie, and then calls the "jump prediction" function -- see "field" section -- to
+#' adjust it based on the last observed values.
#'
-#' @field .params List of computed parameters, for post-run analysis (dev)
-#' @field .pjump Function: how to predict the jump at day interface ?
+#' @field .params List of computed parameters (if applicable).
+#' @field .pjump Function: how to predict the jump at day interface? The arguments of
+#' this function are -- in this order:
+#' \itemize{
+#' \item data : object output of \code{getData()},
+#' \item today : index (integer or date) of the last known day in data,
+#' \item memory : number of days to use in the past (including today),
+#' \item horizon : number of time steps to predict,
+#' \item params : optimized parameters in the main method \code{predictShape()},
+#' \item ... : additional arguments.
+#' }
+#' .pjump returns an estimation of the jump after the last observed value.
#'
#' @section Methods:
#' \describe{
#' \item{\code{initialize(data, pjump)}}{
#' Initialize a Forecaster object with a Data object and a jump prediction function.}
#' \item{\code{predictSerie(today,memory,horizon,...)}}{
-#' Predict a new serie of \code{horizon} values at day index \code{today}
-#' using \code{memory} days in the past.}
+#' Predict a new serie of \code{horizon} values at day index \code{today} using
+#' \code{memory} days in the past.}
#' \item{\code{predictShape(today,memory,horizon,...)}}{
-#' Predict a new shape of \code{horizon} values at day index \code{today}
-#' using \code{memory} days in the past.}
+#' Predict a new shape of \code{horizon} values at day index \code{today} using
+#' \code{memory} days in the past.}
#' \item{\code{getParameters()}}{
#' Return (internal) parameters.}
#' }
#'
+#' @docType class
+#' @format R6 class
+#'
Forecaster = R6::R6Class("Forecaster",
private = list(
.params = list(),
-#' Obtain jump forecast by the Neighbors method
+#' getNeighborsJumpPredict
+#'
+#' Apply optimized weights on gaps observed on selected neighbors.
+#' This jump prediction method can only be used in conjunction with the Neighbors
+#' Forecaster, because it makes use of the optimized parameters to re-apply the weights
+#' on the jumps observed at days interfaces of the past neighbors.
#'
#' @inheritParams computeForecast
#' @inheritParams getZeroJumpPredict
#'
+#' @alias J_Neighbors
+#'
getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...)
{
first_day = max(1, today-memory)
-#' Obtain jump forecast by the Persistence method
+#' getPersistenceJumpPredict
+#'
+#' Analog of the PersistenceForecaster: predict the jump after last observed value either
+#' by re-applying the last jump between similar day and its follower (if argument
+#' "same_day" is TRUE), or by re-using the very last observed jump (when "same_day" =
+#' FALSE).
#'
#' @inheritParams computeForecast
#' @inheritParams getZeroJumpPredict
#'
+#' @alias J_Persistence
+#'
getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...)
{
#return gap between end of similar day curve and first day of tomorrow (in the past)
-#' Just predict zero "jump" (for reference, benchmarking at least)
+#' getZeroJumpPredict
+#'
+#' Just predict zero "jump" (for reference, benchmarking at least).
#'
#' @inheritParams computeForecast
#' @param today Index of the current day (predict tomorrow)
#' @param params Optional parameters computed by the main forecaster
#'
+#' @alias J_Zero
+#'
getZeroJumpPredict = function(data, today, memory, horizon, params, ...)
{
0
#' Compute error
#'
-#' Obtain the errors between forecast and data
+#' Compute the errors between forecasted and measured series.
#'
-#' @param data Dataset, object of class \code{Data} output of \code{getData}
-#' @param pred Forecast object, class \code{Forecast} output of \code{computeForecast}
+#' @param data Object of class \code{Data} output of \code{getData}
+#' @param pred Object of class \code{Forecast} output of \code{computeForecast}
#' @param horizon Horizon where to compute the error
#' (<= horizon used in \code{computeForecast})
#'
-#' @return A list (abs,MAPE) of lists (day,indices)
+#' @return A list (abs,MAPE) of lists (day,indices). The "indices" slots contain series
+#' of size L where L is the number of predicted days; i-th value is the averaged error
+#' (absolute or MAPE) on day i. The "day" slots contain curves of errors, for each time
+#' step, averaged on the L forecasting days.
#'
#' @export
computeError = function(data, pred, horizon=data$getStdHorizon())
#' Compute forecast
#'
-#' Predict time-series curves for the selected days indices.
+#' Predict time-series curves ("tomorrows") at the selected days indices ("todays").
+#' This function just runs a loop over all requested indices, and stores the individual
+#' forecasts into a list which is then turned into a Forecast object.
#'
-#' TODO: details
-#'
-#' @param data Object of type \code{Data}, output of \code{getData()}.
+#' @param data Object of class Data, output of \code{getData()}.
#' @param indices Indices where to forecast (the day after); integers relative to the
#' beginning of data, or (convertible to) Date objects.
#' @param forecaster Name of the main forecaster; more details: ?F_<forecastername>
#' PM10_values_of_last_24h,
#' exogenous_measures_of_last_24h,
#' exogenous_predictions_for_next_24h)
-#' pred <- forecaster$predictSerie(data, data$getSize()-1, ...)
+#' pred <- forecaster$predictSerie(data, data$getSize(), ...)
#' #do_something_with_pred
#' }}
#' @export
-#' @title Acquire data in a clean format
+#' getData
#'
-#' @description Take in input data frames and/or files containing raw data, and
-#' timezones, and output a Data object, roughly corresponding to a list where each cell
-#' contains all value for one day (see \code{?Data}).
+#' Acquire data as a Data object; see ?Data.
+#'
+#' Since series are given in columns (database format), this function builds series one
+#' by one and incrementally grows a Data object which is finally returned.
#'
#' @param ts_data Time-series, as a data frame (DB style: 2 columns, first is date/time,
-#' second is value) or a CSV file
-#' @param exo_data Exogenous variables, as a data frame or a CSV file; first comlumn is
+#' second is value) or a CSV file.
+#' @param exo_data Exogenous variables, as a data frame or a CSV file; first column is
#' dates, next block are measurements for the day, and final block are exogenous
-#' forecasts
+#' forecasts (for the same day).
#' @param input_tz Timezone in the input files ("GMT" or e.g. "Europe/Paris")
#' @param date_format How date/time are stored (e.g. year/month/day hour:minutes;
-#' see \code{strptime})
+#' see ?strptime)
#' @param working_tz Timezone to work with ("GMT" or e.g. "Europe/Paris")
-#' @param predict_at When does the prediction take place ? Integer, in hours. Default: 0
+#' @param predict_at When does the prediction take place? Integer, in hours. Default: 0
#' @param limit Number of days to extract (default: Inf, for "all")
#'
#' @return An object of class Data
#' Plot curves
#'
-#' Plot a range of curves in data
+#' Plot a range of curves in data.
#'
-#' @param data Object of class Data
+#' @inheritParams computeError
#' @param indices Range of indices (integers or dates)
#'
#' @export
#' Plot error
#'
-#' Draw error graphs, potentially from several runs of \code{computeForecast}
+#' Draw error graphs, potentially from several runs of \code{computeForecast()}.
#'
-#' @param err Error as returned by \code{computeError}
+#' @param err Error as returned by \code{computeError()}
#' @param cols Colors for each error (default: 1,2,3,...)
#'
#' @seealso \code{\link{plotCurves}}, \code{\link{plotPredReal}},
#' Plot measured / predicted
#'
-#' Plot measured curve (in black) and predicted curve (in blue)
+#' Plot measured curve (in black) and predicted curve (in blue).
#'
-#' @param data Object return by \code{getData}
-#' @param pred Object as returned by \code{computeForecast}
+#' @inheritParams computeError
#' @param index Index in forecasts (integer or date)
#'
#' @export
#' Plot similarities
#'
-#' Plot histogram of similarities (weights)
+#' Plot histogram of similarities (weights), for 'Neighbors' method.
#'
-#' @param pred Object as returned by \code{computeForecast}
+#' @inheritParams computeError
#' @param index Index in forecasts (integer or date)
#'
#' @export
#' Functional boxplot
#'
-#' Draw the functional boxplot on the left, and bivariate plot on the right
+#' Draw the functional boxplot on the left, and bivariate plot on the right.
#'
-#' @param data Object return by \code{getData}
-#' @param indices integer or date indices to process
+#' @inheritParams computeError
+#' @inheritParams plotCurves
#'
#' @export
plotFbox <- function(data, indices=seq_len(data$getSize()))
#' Compute filaments
#'
-#' Get similar days in the past, as black as distances are small
+#' Obtain similar days in the past, and (optionally) plot them -- as black as distances
+#' are small.
#'
-#' @param data Object as returned by \code{getData}
-#' @param pred Object of class Forecast
+#' @inheritParams computeError
#' @param index Index in forecast (integer or date)
#' @param limit Number of neighbors to consider
#' @param plot Should the result be plotted?
#' Functional boxplot on filaments
#'
-#' Draw the functional boxplot on filaments obtained by \code{computeFilaments}
+#' Draw the functional boxplot on filaments obtained by \code{computeFilaments()}.
#'
-#' @param data Object return by \code{getData}
+#' @inheritParams computeError
#' @param fil Output of \code{computeFilaments}
#'
#' @export
#' Plot relative conditional variability / absolute variability
#'
#' Draw the relative conditional variability / absolute variability based on filaments
-#' obtained by \code{computeFilaments}
+#' obtained by \code{computeFilaments()}.
#'
-#' @param data Object return by \code{getData}
-#' @param fil Output of \code{computeFilaments}
+#' @inheritParams computeError
+#' @inheritParams plotFilamentsBox
#'
#' @export
plotRelVar = function(data, fil)
#' dateIndexToInteger
#'
-#' Transform a (potential) date index into an integer (relative to data)
+#' Transform a (potential) date index into an integer (relative to data beginning).
#'
#' @param index Date (or integer) index
-#' @param data Object of class \code{Data}
+#' @param data Object of class Data, output of \code{getData()}
#'
#' @export
dateIndexToInteger = function(index, data)
{
- #works on integers too: trust input
+ # Works on integers too: trust input
if (is.numeric(index))
index = as.integer(index)
if (is.integer(index))
#' integerIndexToDate
#'
-#' Transform an integer index to date index (relative to data)
+#' Transform an integer index (relative to data beginning) into a date index.
#'
-#' @param index Date (or integer) index
-#' @param data Object of class \code{Data}
+#' @inheritParams dateIndexToInteger
#'
#' @export
integerIndexToDate = function(index, data)
{
- #works on dates too: trust input
+ # Works on dates too: trust input
if (is.character(index))
index = as.Date(index)
if (is(index,"Date"))
#' getSimilarDaysIndices
#'
-#' Find similar days indices in the past.
+#' Find similar days indices in the past; at least same type of day in the week:
+#' monday=tuesday=wednesday=thursday != friday != saturday != sunday.
#'
#' @param index Day index (numeric or date)
#' @param data Reference dataset, object output of \code{getData}
# isSameSeason
#
-# Check if two months fall in the same "season" (defined by estimated pollution rate)
+# Check if two months fall in the same "season" (defined by estimated pollution rate).
#
-# @param month month index to test
-# @param month_ref month to compare to
+# @param month Month index to test
+# @param month_ref Month to compare to
#
.isSameSeason = function(month, month_ref)
{
# isSameDay
#
-# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials
+# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials.
#
-# @param day day index to test
-# @param day_ref day index to compare to
+# @param day Day index to test
+# @param day_ref Day index to compare to
#
.isSameDay = function(day, day_ref)
{
#'
#' Get indices in data of no-NA series followed by no-NA, within [first,last] range.
#'
-#' @param data Object of class Data
+#' @inheritParams dateIndexToInteger
#' @param first First index (included)
#' @param last Last index (included)
#'