From 4f3fdbb8e2ac4bd57a4e27539a58ef0e7ec2304c Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Thu, 4 May 2017 14:45:54 +0200 Subject: [PATCH] update documention, fix package to compete with 'method Bruno' --- Etude_En_Cours_R_E_bis.R | 10 +++--- pkg/R/Data.R | 2 +- pkg/R/F_Average.R | 6 ++-- pkg/R/F_Neighbors.R | 28 +++++----------- pkg/R/F_Zero.R | 3 +- pkg/R/Forecast.R | 2 +- pkg/R/Forecaster.R | 50 ++++++++++++++++------------ pkg/R/computeError.R | 6 ++-- pkg/R/computeForecast.R | 72 ++++++++++++++++++++++++---------------- pkg/R/plot.R | 8 +---- 10 files changed, 97 insertions(+), 90 deletions(-) diff --git a/Etude_En_Cours_R_E_bis.R b/Etude_En_Cours_R_E_bis.R index 1fe1387..56240d0 100644 --- a/Etude_En_Cours_R_E_bis.R +++ b/Etude_En_Cours_R_E_bis.R @@ -107,11 +107,11 @@ Kvois = NULL # w = w/sum(w) # W = w %o% rep(1,48) -print("Voisins + ij") -print(sort(D)[1:nbvois]) -print(dateJPrev) -print(rownames(data)[ind]) -print(data[ind, 1:48]) +#print("Voisins + ij") +#print(sort(D)[1:nbvois]) +#print(dateJPrev) +#print(rownames(data)[sort(ind)]) +#print(data[sort(ind), 1:48]) JourMoy = apply(data[ind, 1:48], 2, mean) #JourMoy = apply(W*data[ind, 1:48], 2, sum) diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 39e34de..d35c88a 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -1,6 +1,6 @@ #' Data #' -#' Data encapsulation as a list (days) of lists (components). +#' Data encapsulation, in the form of a few lists (time-series + exogenous variables). #' #' The private field .tv is a list where each cell contains the hourly variables for a #' period of time of 24 hours, from 1am to next midnight. The other lists contain diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index bee1974..4d395ac 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -3,9 +3,9 @@ #' Pointwise average of all the series of the same day of week in the past. #' #' For example, if the current day (argument "today") is a tuesday, then all series -#' corresponding to wednesdays in the past (until the beginning or memory limit) are -#' averaged to provide a smooth prediction. This forecast will most of the time be wrong, -#' but will also look plausible enough. +#' corresponding to tuesday in the past (until the beginning or memory limit) -- and in +#' the future if 'opera' is FALSE -- are averaged to provide a smooth prediction. This +#' forecast will most of the time be wrong, but will also look plausible enough. #' #' @usage # AverageForecaster$new(pjump) #' diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index d63c177..9d1e3fb 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -1,27 +1,23 @@ #' Neighbors Forecaster #' -#' Predict next serie as a weighted combination of "futures of the past" days, -#' where days in the past are chosen and weighted according to some similarity measures. +#' Predict next serie as a weighted combination of curves observed on "similar" days in +#' the past (and future if 'opera'=FALSE); the nature of the similarity is controlled by +#' the options 'simtype' and 'local' (see below). #' -#' The main method is \code{predictShape()}, taking arguments data, today, memory, -#' predict_from, horizon respectively for the dataset (object output of -#' \code{getData()}), the current index, the data depth (in days), the first predicted -#' hour and the last predicted hour. -#' In addition, optional arguments can be passed: +#' Optional arguments: #' \itemize{ -#' \item local : TRUE (default) to constrain neighbors to be "same days within same -#' season" -#' \item simtype : 'endo' for a similarity based on the series only, +#' \item local: TRUE (default) to constrain neighbors to be "same days in same season" +#' \item simtype: 'endo' for a similarity based on the series only, #' 'exo' for a similarity based on exogenous variables only, #' 'mix' for the product of 'endo' and 'exo', #' 'none' (default) to apply a simple average: no computed weights -#' \item window : A window for similarities computations; override cross-validation +#' \item window: A window for similarities computations; override cross-validation #' window estimation. #' } #' The method is summarized as follows: #' \enumerate{ -#' \item Determine N (=20) recent days without missing values, and followed by a -#' tomorrow also without missing values. +#' \item Determine N (=20) recent days without missing values, and preceded by a +#' curve also without missing values. #' \item Optimize the window parameters (if relevant) on the N chosen days. #' \item Considering the optimized window, compute the neighbors (with locality #' constraint or not), compute their similarities -- using a gaussian kernel if @@ -152,12 +148,6 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", distances2 <- .computeDistsEndo(data, today, tdays, predict_from) ordering <- order(distances2) tdays <- tdays[ ordering[1:max_neighbs] ] - - print("VVVVV") - print(sort(distances2)[1:max_neighbs]) - print(integerIndexToDate(today,data)) - print(lapply(tdays,function(i) integerIndexToDate(i,data))) - print(rbind(data$getSeries(tdays-1), data$getSeries(tdays))) } } else diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index 4e1ae11..21e6e00 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -1,7 +1,6 @@ #' Zero Forecaster #' -#' Flat prediction: always forecast a serie of zeros. -#' This serie is then adjusted using the ".pjump" function (see \code{Forecaster} class). +#' Flat prediction: always forecast a serie of zeros (adjusted by 'pjump' function). #' #' @usage # ZeroForecaster$new(pjump) #' diff --git a/pkg/R/Forecast.R b/pkg/R/Forecast.R index a983046..ff7fb8d 100644 --- a/pkg/R/Forecast.R +++ b/pkg/R/Forecast.R @@ -3,7 +3,7 @@ #' Forecast encapsulation as a list (days where prediction occur) of lists (components). #' #' The private field .pred is a list where each cell contains the predicted variables for -#' a period of time of H<=24 hours, from hour P until P+H-1, where P == predict_from. +#' a period of time of H-P+1<=24 hours, from hour P until H, where P == predict_from. #' \code{forecast$getForecast(i)} output forecasts for #' \code{data$getSerie(forecast$getIndexInData(i))}. #' diff --git a/pkg/R/Forecaster.R b/pkg/R/Forecaster.R index 5258760..2b259fc 100644 --- a/pkg/R/Forecaster.R +++ b/pkg/R/Forecaster.R @@ -6,8 +6,9 @@ #' example "Neighbors" method stores informations about the considered neighborhood for #' the current prediction task) and one main function: \code{predictSerie()}. This last #' function (by default) calls \code{predictShape()} to get a forecast of a centered -#' serie, and then calls the "jump prediction" function -- see "field" section -- to -#' adjust it based on the last observed values. +#' serie, and then calls the "jump prediction" function if it's provided -- see "field" +#' section -- to adjust it based on the last observed values. The main method in derived +#' forecasters is \code{predictShape()}; see 'Methods' section. #' #' @usage # Forecaster$new(pjump) #warning: predictShape() is unimplemented #' @@ -15,25 +16,27 @@ #' @field .pjump Function: how to predict the jump at day interface? The arguments of #' this function are -- in this order: #' \itemize{ -#' \item data : object output of \code{getData()}, -#' \item today : index (integer or date) of the last known day in data, -#' \item memory : number of days to use in the past (including today), -#' \item horizon : number of time steps to predict, -#' \item params : optimized parameters in the main method \code{predictShape()}, -#' \item ... : additional arguments. +#' \item data: object output of \code{getData()}, +#' \item today: index of the current day in data (known until predict_from-1), +#' \item memory: number of days to use in the past (including today), +#' \item predict_from: first time step to predict (in [1,24]) +#' \item horizon: last time step to predict (in [predict_from,24]), +#' \item params: optimized parameters in the main method \code{predictShape()}, +#' \item ...: additional arguments. #' } #' .pjump returns an estimation of the jump after the last observed value. #' #' @section Methods: #' \describe{ #' \item{\code{initialize(data, pjump)}}{ -#' Initialize a Forecaster object with a Data object and a jump prediction function.} -#' \item{\code{predictSerie(today,memory,horizon,...)}}{ -#' Predict a new serie of \code{horizon} values at day index \code{today} using -#' \code{memory} days in the past.} -#' \item{\code{predictShape(today,memory,horizon,...)}}{ -#' Predict a new shape of \code{horizon} values at day index \code{today} using +#' Initialize a Forecaster object with a Data object and a jump prediction function, +#' or NULL if \code{predictShape()} returns an adjusted curve.} +#' \item{\code{predictSerie(data,today,memory,predict_from,horizon,...)}}{ +#' Predict the next curve (at index today) from predict_from to horizon (hours), using #' \code{memory} days in the past.} +#' \item{\code{predictShape(data,today,memory,predict_from,horizon,...)}}{ +#' Predict the shape of the next curve (at index today) from predict_from to horizon +#' (hours), using \code{memory} days in the past.} #' \item{\code{getParameters()}}{ #' Return (internal) parameters.} #' } @@ -55,15 +58,20 @@ Forecaster = R6::R6Class("Forecaster", predictSerie = function(data, today, memory, predict_from, horizon, ...) { # Parameters (potentially) computed during shape prediction stage - predicted_shape = self$predictShape(data,today,memory,predict_from,horizon,...) - predicted_delta = private$.pjump(data, today, memory, predict_from, horizon, - private$.params, ...) + predicted_shape <- self$predictShape(data,today,memory,predict_from,horizon,...) + predicted_delta <- + if (is.null(private$.pjump)) + NULL + else + private$.pjump(data,today,memory,predict_from,horizon,private$.params,...) - # Predicted shape is aligned on the end of current day + jump + # Predicted shape is aligned on the end of current day + jump (if jump!=NULL) c( data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()], - predicted_shape - predicted_shape[1] + predicted_delta + - ifelse(predict_from>=2, - data$getSerie(today)[predict_from-1], tail(data$getSerie(today-1),1)) ) + predicted_shape + ifelse( is.null(private$.pjump), + 0, + predicted_delta - predicted_shape[1] + + ifelse(predict_from>=2, + data$getSerie(today)[predict_from-1], tail(data$getSerie(today-1),1)) ) ) }, predictShape = function(data, today, memory, predict_from, horizon, ...) NULL #empty default implementation: to implement in inherited classes diff --git a/pkg/R/computeError.R b/pkg/R/computeError.R index c3bc4f3..db5783e 100644 --- a/pkg/R/computeError.R +++ b/pkg/R/computeError.R @@ -4,8 +4,10 @@ #' #' @param data Object of class \code{Data} output of \code{getData} #' @param pred Object of class \code{Forecast} output of \code{computeForecast} -#' @param horizon Horizon where to compute the error -#' (<= horizon used in \code{computeForecast}) +#' @param predict_from First time step to consider (>= predict_from used in +#' \code{computeForecast()}) +#' @param horizon Horizon where to compute the error (<= horizon used in +#' \code{computeForecast}) #' #' @return A list (abs,MAPE) of lists (day,indices). The "indices" slots contain series #' of size L where L is the number of predicted days; i-th value is the averaged error diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index ef46dd3..c778d66 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -1,28 +1,31 @@ #' Compute forecast #' -#' Predict time-series curves ("tomorrows") at the selected days indices ("todays"). -#' This function just runs a loop over all requested indices, and stores the individual -#' forecasts into a list which is then turned into a Forecast object. +#' Predict time-series curves ("today" from predict_from to horizon) at the selected days +#' indices ("today" from 1am to predict_from-1). This function just runs a loop over all +#' requested indices, and stores the individual forecasts into a Forecast object. #' #' @param data Object of class Data, output of \code{getData()}. #' @param indices Indices where to forecast (the day after); integers relative to the #' beginning of data, or (convertible to) Date objects. #' @param forecaster Name of the main forecaster; more details: ?F_ -#' \itemize{ -#' \item Persistence : use last (similar, next) day -#' \item Neighbors : weighted tomorrows of similar days -#' \item Average : average tomorrow of all same day-in-week -#' \item Zero : just output 0 (benchmarking purpose) -#' } +#' \itemize{ +#' \item Persistence : use last (similar) day +#' \item Neighbors : weighted similar days +#' \item Average : average curve of all same day-in-week +#' \item Zero : just output 0 (benchmarking purpose) +#' } #' @param pjump Function to predict the jump at the interface between two days; #' more details: ?J_ -#' \itemize{ -#' \item Persistence : use last (similar, next) day -#' \item Neighbors: re-use the weights from F_Neighbors -#' \item Zero: just output 0 (no adjustment) -#' } +#' \itemize{ +#' \item Persistence : use last (similar) day +#' \item Neighbors: re-use the weights from F_Neighbors +#' \item Zero: just output 0 (no adjustment) +#' } +#' If pjump=NULL, then no adjustment is performed (output of \code{predictShape()} is +#' used directly). +#' @param predict_from First time step to predict. #' @param memory Data depth (in days) to be used for prediction. -#' @param horizon Number of time steps to predict. +#' @param horizon Last time step to predict. #' @param ncores Number of cores for parallel execution (1 to disable). #' @param ... Additional parameters for the forecasting models. #' @@ -31,23 +34,30 @@ #' @examples #' ts_data <- system.file("extdata","pm10_mesures_H_loc.csv",package="talweg") #' exo_data <- system.file("extdata","meteo_extra_noNAs.csv",package="talweg") -#' data <- getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", -#' predict_at=7, limit=200) +#' data <- getData(ts_data, exo_data, limit=200) #' pred <- computeForecast(data, 100:130, "Persistence", "Zero", -#' memory=50, horizon=12, ncores=1) -#' \dontrun{#Sketch for real-time mode: +#' predict_from=8, memory=50, horizon=12, ncores=1) +#' \dontrun{ +#' #Sketch for real-time mode: #' data <- Data$new() #' forecaster <- MyForecaster$new(myJumpPredictFunc) #' repeat { -#' # In the morning 7am+ or afternoon 1pm+: +#' # As soon as daily predictions are available: #' data$append( -#' times_from_H+1_yersteday_to_Hnow, -#' PM10_values_of_last_24h, -#' exogenous_measures_of_last_24h, -#' exogenous_predictions_for_next_24h) +#' level_hat=predicted_level, +#' exo_hat=predicted_exogenous) +#' # When a day ends: +#' data$append( +#' level=observed_level, +#' exo=observed_exogenous) +#' # And, at every hour: +#' data$append( +#' time=current_hour, +#' value=current_PM10) +#' # Finally, a bit before predict_from hour: #' pred <- forecaster$predictSerie(data, data$getSize(), ...) #' #do_something_with_pred -#' }} +#' } } #' @export computeForecast = function(data, indices, forecaster, pjump, predict_from, memory=Inf, horizon=length(data$getSerie(1)), ncores=3, ...) @@ -64,14 +74,18 @@ computeForecast = function(data, indices, forecaster, pjump, predict_from, integer_indices = sapply(indices, function(i) dateIndexToInteger(i,data)) if (any(integer_indices<=0 | integer_indices>data$getSize())) stop("Indices out of range") - if (!is.character(forecaster) || !is.character(pjump)) - stop("forecaster (name) and pjump (function) should be of class character") + if (!is.character(forecaster)) + stop("forecaster (name): character") + if (!is.null(pjump) && !is.character(pjump)) + stop("pjump (function): character or NULL") pred = Forecast$new( sapply(indices, function(i) integerIndexToDate(i,data)) ) forecaster_class_name = getFromNamespace( paste(forecaster,"Forecaster",sep=""), "talweg") - forecaster = forecaster_class_name$new( #.pjump = - getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg")) + + if (!is.null(pjump)) + pjump <- getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg") + forecaster = forecaster_class_name$new(pjump) computeOneForecast <- function(i) { diff --git a/pkg/R/plot.R b/pkg/R/plot.R index 465b606..2488d81 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -11,13 +11,7 @@ plotCurves <- function(data, indices=seq_len(data$getSize())) series = data$getSeries(indices) yrange = quantile(series, probs=c(0.025,0.975), na.rm=TRUE) par(mar=c(4.7,5,1,1), cex.axis=1.5, cex.lab=1.5) - for (i in seq_along(indices)) - { - plot(series[,i], type="l", ylim=yrange, - xlab=ifelse(i==1,"Time (hours)",""), ylab=ifelse(i==1,"PM10","")) - if (i < length(indices)) - par(new=TRUE) - } + matplot(series, type="l", ylim=yrange, xlab="Time (hours)", ylab="PM10") } #' Plot error -- 2.44.0