From: Benjamin Auder Date: Sun, 9 Apr 2017 18:15:09 +0000 (+0200) Subject: Improve documentation X-Git-Url: https://git.auder.net/doc/DESCRIPTION?a=commitdiff_plain;h=102bcfda4afbb5cfee885cbee0f55545624168fd;p=talweg.git Improve documentation --- diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R index 1d78d9c..466293c 100644 --- a/pkg/R/A_NAMESPACE.R +++ b/pkg/R/A_NAMESPACE.R @@ -1,3 +1,10 @@ +#' @importFrom grDevices colors gray.colors +#' @importFrom graphics abline hist par plot +#' @importFrom methods hasArg is +#' @importFrom stats quantile sd +#' @importFrom utils getFromNamespace read.csv tail +#' @importFrom R6 R6Class +#' #' @include Data.R #' @include Forecast.R #' @include Forecaster.R @@ -14,10 +21,4 @@ #' @include computeError.R #' @include plot.R #' -#' @importFrom grDevices colors gray.colors -#' @importFrom graphics abline hist par plot -#' @importFrom methods hasArg is -#' @importFrom stats quantile sd -#' @importFrom utils getFromNamespace read.csv tail -#' NULL diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 39f3837..c193f7d 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -1,51 +1,56 @@ #' Data #' -#' Data encapsulation +#' Data encapsulation as a list (days) of lists (components). #' -#' @docType class -#' @importFrom R6 R6Class +#' The private field .data is a list where each cell contains the variables for a period +#' of time of 24 hours, from time P+1 until P the next day where P is an integer between +#' 0 and 23. .data[[1]] refers to the first measured data (serie and exogenous +#' variables): the corresponding times can be accessed through the function +#' \code{getTime()} below. Each cell .data[[i]] is itself a list containing five slots, +#' as described in the 'field' section. #' -#' @field .data List of +#' @field .data[[i]] List of #' \itemize{ #' \item time: vector of times -#' \item centered_serie: centered series -#' \item level: corresponding levels +#' \item centered_serie: centered serie +#' \item level: corresponding level #' \item exo: exogenous variables -#' \item exo_hat: predicted exogenous variables +#' \item exo_hat: predicted exogenous variables (for next day) #' } #' #' @section Methods: #' \describe{ #' \item{\code{getSize()}}{ -#' Return number of series in dataset.} +#' Number of series in dataset.} #' \item{\code{getStdHorizon()}}{ -#' Return number of time steps from serie[1] until midnight} -#' \item{\code{appendHat(time, exo_hat)}}{ -#' New estimated exogenous variables + time} -#' \item{\code{append(serie, exo)}}{ -#' New measured data; call *after* \code{appendHat()}} +#' Number of time steps from serie[1] until midnight} +#' \item{\code{append(time, serie, exo, exo_hat)}}{ +#' Measured data for given vector of times + exogenous predictions from last midgnight. #' \item{\code{getTime(index)}}{ -#' Get times at specified index.} +#' Times (vector) at specified index.} #' \item{\code{getCenteredSerie(index)}}{ -#' Get (measured or predicted) centered serie at specified index.} +#' Centered serie at specified index.} #' \item{\code{getCenteredSeries(indices)}}{ -#' Get centered series at specified indices (in columns).} +#' Centered series at specified indices (in columns).} #' \item{\code{getLevel(index)}}{ -#' Get level at specified index.} +#' Level at specified index.} #' \item{\code{getSerie(index)}}{ -#' Get serie (centered+level) at specified index.} +#' Serie (centered+level) at specified index.} #' \item{\code{getSeries(indices)}}{ -#' Get series at specified indices (in columns).} +#' Series at specified indices (in columns).} #' \item{\code{getExoHat(index)}}{ -#' Get predicted exogenous variables at specified index.} +#' Predicted exogenous variables at specified index.} #' \item{\code{getExo(index)}}{ -#' Get exogenous variables at specified index.} +#' Measured exogenous variables at specified index.} #' \item{\code{removeFirst()}}{ #' Remove first list element (if truncated).} #' \item{\code{removeLast()}}{ #' Remove last list element (if truncated).} #' } #' +#' @docType class +#' @format R6 class +#' Data = R6::R6Class("Data", private = list( .data = list() diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index d7febd4..8a33111 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -1,7 +1,13 @@ #' Average Forecaster #' -#' Pointwise average of all series of the same (day of week) days in the past. +#' Pointwise average of all the series of the same day of week in the past. #' +#' For example, if the current day (argument "today") is a tuesday, then all series +#' corresponding to wednesdays in the past (until the beginning or memory limit) are +#' averaged to provide a smooth prediction. This forecast will most of the time be wrong, +#' but will also look plausible enough. +#' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Average #' diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 7e0cdd2..12595d9 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -2,10 +2,33 @@ #' #' Predict next serie as a weighted combination of "futures of the past" days, #' where days in the past are chosen and weighted according to some similarity measures. -#' See 'details' section. #' -#' TODO: details. +#' The main method is \code{predictShape()}, taking arguments data, today, memory, +#' horizon respectively for the dataset (object output of \code{getData()}), the current +#' index, the data depth (in days) and the number of time steps to forecast. +#' In addition, optional arguments can be passed: +#' \itemize{ +#' \item local : TRUE (default) to constrain neighbors to be "same days within same +#' season" +#' \item simtype : 'endo' for a similarity based on the series only, +#' 'exo' for a similaruty based on exogenous variables only, +#' 'mix' for the product of 'endo' and 'exo', +#' 'none' (default) to apply a simple average: no computed weights +#' \item window : A window for similarities computations; override cross-validation +#' window estimation. +#' } +#' The method is summarized as follows: +#' \enumerate{ +#' \item Determine N (=20) recent days without missing values, and followed by a +#' tomorrow also without missing values. +#' \item Optimize the window parameters (if relevant) on the N chosen days. +#' \item Considering the optimized window, compute the neighbors (with locality +#' constraint or not), compute their similarities -- using a gaussian kernel if +#' simtype != "none" -- and average accordingly the "tomorrows of neigbors" to +#' obtain the final prediction. +#' } #' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Neighbors #' diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index 754df07..eaeca98 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -1,8 +1,14 @@ #' Persistence Forecaster #' #' Look for the most recent similar day in the past, and return its corresponding curve. -#' "Similar day" means "same day in the week". #' +#' There are two variations, depending whether "similar day" means "same day in the week" +#' or "most recent day" (regardless of day type). The corresponding argument is named +#' "same_day": a value of TRUE implies the former interpretation (same day in week). +#' If the last similar day has missing values, the next one is searched, and so on until +#' one full serie is found (if no one is found, NA is returned). +#' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Persistence #' diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index 3c46f2c..977cba9 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -1,7 +1,9 @@ #' Zero Forecaster #' -#' Flat prediction of next series of 'horizon' values. +#' Flat prediction: always forecast a serie of zeros. +#' This serie is then adjusted using the ".pjump" function (see \code{Forecaster} class). #' +#' @docType class #' @format R6 class, inherits Forecaster #' @alias F_Zero #' diff --git a/pkg/R/Forecast.R b/pkg/R/Forecast.R index a0ddcda..6036daf 100644 --- a/pkg/R/Forecast.R +++ b/pkg/R/Forecast.R @@ -1,15 +1,21 @@ #' Forecast #' -#' Forecast encapsulation +#' Forecast encapsulation as a list (days where prediction occur) of lists (components). #' -#' @docType class -#' @importFrom R6 R6Class +#' The private field .pred is a list where each cell contains the predicted variables for +#' a period of time of H<=24 hours, from hour P+1 until P+H, where P+1 is taken right +#' after the end of the period designated by \code{getIndexInData()}. In other terms, +#' \code{forecast$getForecast(i)} return forecasts for \code{data$getSerie(i+1)}. +#' Each cell .pred[[i]] is itself a list containing three slots, as described in the +#' 'field' section. #' -#' @field .pred List with \itemize{ -#' \item serie: forecasted serie +#' @field .pred List with +#' \itemize{ +#' \item serie: the forecasted serie #' \item params: corresponding list of parameters (weights, neighbors...) -#' \item index_in_data: corresponding index in data object} -#' @field .dates vector of day indices where forcast occurs +#' \item index_in_data: corresponding index in data object +#' } +#' @field .dates vector of (integer) day indices where forecast occurs #' #' @section Methods: #' \describe{ @@ -30,6 +36,9 @@ #' Get index in data which corresponds to current forecast.} #' } #' +#' @docType class +#' @format R6 class +#' Forecast = R6::R6Class("Forecast", private = list( .pred = list(), diff --git a/pkg/R/Forecaster.R b/pkg/R/Forecaster.R index cedb2b6..2efa9ba 100644 --- a/pkg/R/Forecaster.R +++ b/pkg/R/Forecaster.R @@ -1,27 +1,44 @@ #' Forecaster #' -#' Forecaster (abstract class, implemented by all forecasters) +#' Forecaster (abstract class, implemented by all forecasters). #' -#' @docType class -#' @importFrom R6 R6Class +#' A Forecaster object encapsulates parameters (which can be of various kinds, for +#' example "Neighbors" method stores informations about the considered neighborhood for +#' the current prediction task) and one main function: \code{predictSerie()}. This last +#' function (by default) calls \code{predictShape()} to get a forecast of a centered +#' serie, and then calls the "jump prediction" function -- see "field" section -- to +#' adjust it based on the last observed values. #' -#' @field .params List of computed parameters, for post-run analysis (dev) -#' @field .pjump Function: how to predict the jump at day interface ? +#' @field .params List of computed parameters (if applicable). +#' @field .pjump Function: how to predict the jump at day interface? The arguments of +#' this function are -- in this order: +#' \itemize{ +#' \item data : object output of \code{getData()}, +#' \item today : index (integer or date) of the last known day in data, +#' \item memory : number of days to use in the past (including today), +#' \item horizon : number of time steps to predict, +#' \item params : optimized parameters in the main method \code{predictShape()}, +#' \item ... : additional arguments. +#' } +#' .pjump returns an estimation of the jump after the last observed value. #' #' @section Methods: #' \describe{ #' \item{\code{initialize(data, pjump)}}{ #' Initialize a Forecaster object with a Data object and a jump prediction function.} #' \item{\code{predictSerie(today,memory,horizon,...)}}{ -#' Predict a new serie of \code{horizon} values at day index \code{today} -#' using \code{memory} days in the past.} +#' Predict a new serie of \code{horizon} values at day index \code{today} using +#' \code{memory} days in the past.} #' \item{\code{predictShape(today,memory,horizon,...)}}{ -#' Predict a new shape of \code{horizon} values at day index \code{today} -#' using \code{memory} days in the past.} +#' Predict a new shape of \code{horizon} values at day index \code{today} using +#' \code{memory} days in the past.} #' \item{\code{getParameters()}}{ #' Return (internal) parameters.} #' } #' +#' @docType class +#' @format R6 class +#' Forecaster = R6::R6Class("Forecaster", private = list( .params = list(), diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R index 12fc9d4..5a818ca 100644 --- a/pkg/R/J_Neighbors.R +++ b/pkg/R/J_Neighbors.R @@ -1,8 +1,15 @@ -#' Obtain jump forecast by the Neighbors method +#' getNeighborsJumpPredict +#' +#' Apply optimized weights on gaps observed on selected neighbors. +#' This jump prediction method can only be used in conjunction with the Neighbors +#' Forecaster, because it makes use of the optimized parameters to re-apply the weights +#' on the jumps observed at days interfaces of the past neighbors. #' #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' +#' @alias J_Neighbors +#' getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) { first_day = max(1, today-memory) diff --git a/pkg/R/J_Persistence.R b/pkg/R/J_Persistence.R index a85a42a..37d05fe 100644 --- a/pkg/R/J_Persistence.R +++ b/pkg/R/J_Persistence.R @@ -1,8 +1,15 @@ -#' Obtain jump forecast by the Persistence method +#' getPersistenceJumpPredict +#' +#' Analog of the PersistenceForecaster: predict the jump after last observed value either +#' by re-applying the last jump between similar day and its follower (if argument +#' "same_day" is TRUE), or by re-using the very last observed jump (when "same_day" = +#' FALSE). #' #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' +#' @alias J_Persistence +#' getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...) { #return gap between end of similar day curve and first day of tomorrow (in the past) diff --git a/pkg/R/J_Zero.R b/pkg/R/J_Zero.R index d9140a5..065226e 100644 --- a/pkg/R/J_Zero.R +++ b/pkg/R/J_Zero.R @@ -1,9 +1,13 @@ -#' Just predict zero "jump" (for reference, benchmarking at least) +#' getZeroJumpPredict +#' +#' Just predict zero "jump" (for reference, benchmarking at least). #' #' @inheritParams computeForecast #' @param today Index of the current day (predict tomorrow) #' @param params Optional parameters computed by the main forecaster #' +#' @alias J_Zero +#' getZeroJumpPredict = function(data, today, memory, horizon, params, ...) { 0 diff --git a/pkg/R/computeError.R b/pkg/R/computeError.R index 7da1032..1fbb2a8 100644 --- a/pkg/R/computeError.R +++ b/pkg/R/computeError.R @@ -1,13 +1,16 @@ #' Compute error #' -#' Obtain the errors between forecast and data +#' Compute the errors between forecasted and measured series. #' -#' @param data Dataset, object of class \code{Data} output of \code{getData} -#' @param pred Forecast object, class \code{Forecast} output of \code{computeForecast} +#' @param data Object of class \code{Data} output of \code{getData} +#' @param pred Object of class \code{Forecast} output of \code{computeForecast} #' @param horizon Horizon where to compute the error #' (<= horizon used in \code{computeForecast}) #' -#' @return A list (abs,MAPE) of lists (day,indices) +#' @return A list (abs,MAPE) of lists (day,indices). The "indices" slots contain series +#' of size L where L is the number of predicted days; i-th value is the averaged error +#' (absolute or MAPE) on day i. The "day" slots contain curves of errors, for each time +#' step, averaged on the L forecasting days. #' #' @export computeError = function(data, pred, horizon=data$getStdHorizon()) diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 1d69777..23ccb04 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -1,10 +1,10 @@ #' Compute forecast #' -#' Predict time-series curves for the selected days indices. +#' Predict time-series curves ("tomorrows") at the selected days indices ("todays"). +#' This function just runs a loop over all requested indices, and stores the individual +#' forecasts into a list which is then turned into a Forecast object. #' -#' TODO: details -#' -#' @param data Object of type \code{Data}, output of \code{getData()}. +#' @param data Object of class Data, output of \code{getData()}. #' @param indices Indices where to forecast (the day after); integers relative to the #' beginning of data, or (convertible to) Date objects. #' @param forecaster Name of the main forecaster; more details: ?F_ @@ -44,7 +44,7 @@ #' PM10_values_of_last_24h, #' exogenous_measures_of_last_24h, #' exogenous_predictions_for_next_24h) -#' pred <- forecaster$predictSerie(data, data$getSize()-1, ...) +#' pred <- forecaster$predictSerie(data, data$getSize(), ...) #' #do_something_with_pred #' }} #' @export diff --git a/pkg/R/getData.R b/pkg/R/getData.R index 268c54a..a6401f9 100644 --- a/pkg/R/getData.R +++ b/pkg/R/getData.R @@ -1,19 +1,20 @@ -#' @title Acquire data in a clean format +#' getData #' -#' @description Take in input data frames and/or files containing raw data, and -#' timezones, and output a Data object, roughly corresponding to a list where each cell -#' contains all value for one day (see \code{?Data}). +#' Acquire data as a Data object; see ?Data. +#' +#' Since series are given in columns (database format), this function builds series one +#' by one and incrementally grows a Data object which is finally returned. #' #' @param ts_data Time-series, as a data frame (DB style: 2 columns, first is date/time, -#' second is value) or a CSV file -#' @param exo_data Exogenous variables, as a data frame or a CSV file; first comlumn is +#' second is value) or a CSV file. +#' @param exo_data Exogenous variables, as a data frame or a CSV file; first column is #' dates, next block are measurements for the day, and final block are exogenous -#' forecasts +#' forecasts (for the same day). #' @param input_tz Timezone in the input files ("GMT" or e.g. "Europe/Paris") #' @param date_format How date/time are stored (e.g. year/month/day hour:minutes; -#' see \code{strptime}) +#' see ?strptime) #' @param working_tz Timezone to work with ("GMT" or e.g. "Europe/Paris") -#' @param predict_at When does the prediction take place ? Integer, in hours. Default: 0 +#' @param predict_at When does the prediction take place? Integer, in hours. Default: 0 #' @param limit Number of days to extract (default: Inf, for "all") #' #' @return An object of class Data diff --git a/pkg/R/plot.R b/pkg/R/plot.R index 5ea4843..48b456c 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -1,8 +1,8 @@ #' Plot curves #' -#' Plot a range of curves in data +#' Plot a range of curves in data. #' -#' @param data Object of class Data +#' @inheritParams computeError #' @param indices Range of indices (integers or dates) #' #' @export @@ -22,9 +22,9 @@ plotCurves <- function(data, indices=seq_len(data$getSize())) #' Plot error #' -#' Draw error graphs, potentially from several runs of \code{computeForecast} +#' Draw error graphs, potentially from several runs of \code{computeForecast()}. #' -#' @param err Error as returned by \code{computeError} +#' @param err Error as returned by \code{computeError()} #' @param cols Colors for each error (default: 1,2,3,...) #' #' @seealso \code{\link{plotCurves}}, \code{\link{plotPredReal}}, @@ -74,10 +74,9 @@ plotError <- function(err, cols=seq_along(err)) #' Plot measured / predicted #' -#' Plot measured curve (in black) and predicted curve (in blue) +#' Plot measured curve (in black) and predicted curve (in blue). #' -#' @param data Object return by \code{getData} -#' @param pred Object as returned by \code{computeForecast} +#' @inheritParams computeError #' @param index Index in forecasts (integer or date) #' #' @export @@ -95,9 +94,9 @@ plotPredReal <- function(data, pred, index) #' Plot similarities #' -#' Plot histogram of similarities (weights) +#' Plot histogram of similarities (weights), for 'Neighbors' method. #' -#' @param pred Object as returned by \code{computeForecast} +#' @inheritParams computeError #' @param index Index in forecasts (integer or date) #' #' @export @@ -112,10 +111,10 @@ plotSimils <- function(pred, index) #' Functional boxplot #' -#' Draw the functional boxplot on the left, and bivariate plot on the right +#' Draw the functional boxplot on the left, and bivariate plot on the right. #' -#' @param data Object return by \code{getData} -#' @param indices integer or date indices to process +#' @inheritParams computeError +#' @inheritParams plotCurves #' #' @export plotFbox <- function(data, indices=seq_len(data$getSize())) @@ -138,10 +137,10 @@ plotFbox <- function(data, indices=seq_len(data$getSize())) #' Compute filaments #' -#' Get similar days in the past, as black as distances are small +#' Obtain similar days in the past, and (optionally) plot them -- as black as distances +#' are small. #' -#' @param data Object as returned by \code{getData} -#' @param pred Object of class Forecast +#' @inheritParams computeError #' @param index Index in forecast (integer or date) #' @param limit Number of neighbors to consider #' @param plot Should the result be plotted? @@ -197,9 +196,9 @@ computeFilaments <- function(data, pred, index, limit=60, plot=TRUE) #' Functional boxplot on filaments #' -#' Draw the functional boxplot on filaments obtained by \code{computeFilaments} +#' Draw the functional boxplot on filaments obtained by \code{computeFilaments()}. #' -#' @param data Object return by \code{getData} +#' @inheritParams computeError #' @param fil Output of \code{computeFilaments} #' #' @export @@ -227,10 +226,10 @@ plotFilamentsBox = function(data, fil) #' Plot relative conditional variability / absolute variability #' #' Draw the relative conditional variability / absolute variability based on filaments -#' obtained by \code{computeFilaments} +#' obtained by \code{computeFilaments()}. #' -#' @param data Object return by \code{getData} -#' @param fil Output of \code{computeFilaments} +#' @inheritParams computeError +#' @inheritParams plotFilamentsBox #' #' @export plotRelVar = function(data, fil) diff --git a/pkg/R/utils.R b/pkg/R/utils.R index bbf1387..3124881 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -1,14 +1,14 @@ #' dateIndexToInteger #' -#' Transform a (potential) date index into an integer (relative to data) +#' Transform a (potential) date index into an integer (relative to data beginning). #' #' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @param data Object of class Data, output of \code{getData()} #' #' @export dateIndexToInteger = function(index, data) { - #works on integers too: trust input + # Works on integers too: trust input if (is.numeric(index)) index = as.integer(index) if (is.integer(index)) @@ -30,15 +30,14 @@ dateIndexToInteger = function(index, data) #' integerIndexToDate #' -#' Transform an integer index to date index (relative to data) +#' Transform an integer index (relative to data beginning) into a date index. #' -#' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @inheritParams dateIndexToInteger #' #' @export integerIndexToDate = function(index, data) { - #works on dates too: trust input + # Works on dates too: trust input if (is.character(index)) index = as.Date(index) if (is(index,"Date")) @@ -54,7 +53,8 @@ integerIndexToDate = function(index, data) #' getSimilarDaysIndices #' -#' Find similar days indices in the past. +#' Find similar days indices in the past; at least same type of day in the week: +#' monday=tuesday=wednesday=thursday != friday != saturday != sunday. #' #' @param index Day index (numeric or date) #' @param data Reference dataset, object output of \code{getData} @@ -88,10 +88,10 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) # isSameSeason # -# Check if two months fall in the same "season" (defined by estimated pollution rate) +# Check if two months fall in the same "season" (defined by estimated pollution rate). # -# @param month month index to test -# @param month_ref month to compare to +# @param month Month index to test +# @param month_ref Month to compare to # .isSameSeason = function(month, month_ref) { @@ -104,10 +104,10 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) # isSameDay # -# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials +# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials. # -# @param day day index to test -# @param day_ref day index to compare to +# @param day Day index to test +# @param day_ref Day index to compare to # .isSameDay = function(day, day_ref) { @@ -122,7 +122,7 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) #' #' Get indices in data of no-NA series followed by no-NA, within [first,last] range. #' -#' @param data Object of class Data +#' @inheritParams dateIndexToInteger #' @param first First index (included) #' @param last Last index (included) #'