# w = w/sum(w)
# W = w %o% rep(1,48)
-print("Voisins + ij")
-print(sort(D)[1:nbvois])
-print(dateJPrev)
-print(rownames(data)[ind])
-print(data[ind, 1:48])
+#print("Voisins + ij")
+#print(sort(D)[1:nbvois])
+#print(dateJPrev)
+#print(rownames(data)[sort(ind)])
+#print(data[sort(ind), 1:48])
JourMoy = apply(data[ind, 1:48], 2, mean)
#JourMoy = apply(W*data[ind, 1:48], 2, sum)
#' Data
#'
-#' Data encapsulation as a list (days) of lists (components).
+#' Data encapsulation, in the form of a few lists (time-series + exogenous variables).
#'
#' The private field .tv is a list where each cell contains the hourly variables for a
#' period of time of 24 hours, from 1am to next midnight. The other lists contain
#' Pointwise average of all the series of the same day of week in the past.
#'
#' For example, if the current day (argument "today") is a tuesday, then all series
-#' corresponding to wednesdays in the past (until the beginning or memory limit) are
-#' averaged to provide a smooth prediction. This forecast will most of the time be wrong,
-#' but will also look plausible enough.
+#' corresponding to tuesday in the past (until the beginning or memory limit) -- and in
+#' the future if 'opera' is FALSE -- are averaged to provide a smooth prediction. This
+#' forecast will most of the time be wrong, but will also look plausible enough.
#'
#' @usage # AverageForecaster$new(pjump)
#'
#' Neighbors Forecaster
#'
-#' Predict next serie as a weighted combination of "futures of the past" days,
-#' where days in the past are chosen and weighted according to some similarity measures.
+#' Predict next serie as a weighted combination of curves observed on "similar" days in
+#' the past (and future if 'opera'=FALSE); the nature of the similarity is controlled by
+#' the options 'simtype' and 'local' (see below).
#'
-#' The main method is \code{predictShape()}, taking arguments data, today, memory,
-#' predict_from, horizon respectively for the dataset (object output of
-#' \code{getData()}), the current index, the data depth (in days), the first predicted
-#' hour and the last predicted hour.
-#' In addition, optional arguments can be passed:
+#' Optional arguments:
#' \itemize{
-#' \item local : TRUE (default) to constrain neighbors to be "same days within same
-#' season"
-#' \item simtype : 'endo' for a similarity based on the series only,<cr>
+#' \item local: TRUE (default) to constrain neighbors to be "same days in same season"
+#' \item simtype: 'endo' for a similarity based on the series only,<cr>
#' 'exo' for a similarity based on exogenous variables only,<cr>
#' 'mix' for the product of 'endo' and 'exo',<cr>
#' 'none' (default) to apply a simple average: no computed weights
-#' \item window : A window for similarities computations; override cross-validation
+#' \item window: A window for similarities computations; override cross-validation
#' window estimation.
#' }
#' The method is summarized as follows:
#' \enumerate{
-#' \item Determine N (=20) recent days without missing values, and followed by a
-#' tomorrow also without missing values.
+#' \item Determine N (=20) recent days without missing values, and preceded by a
+#' curve also without missing values.
#' \item Optimize the window parameters (if relevant) on the N chosen days.
#' \item Considering the optimized window, compute the neighbors (with locality
#' constraint or not), compute their similarities -- using a gaussian kernel if
distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
ordering <- order(distances2)
tdays <- tdays[ ordering[1:max_neighbs] ]
-
- print("VVVVV")
- print(sort(distances2)[1:max_neighbs])
- print(integerIndexToDate(today,data))
- print(lapply(tdays,function(i) integerIndexToDate(i,data)))
- print(rbind(data$getSeries(tdays-1), data$getSeries(tdays)))
}
}
else
#' Zero Forecaster
#'
-#' Flat prediction: always forecast a serie of zeros.
-#' This serie is then adjusted using the ".pjump" function (see \code{Forecaster} class).
+#' Flat prediction: always forecast a serie of zeros (adjusted by 'pjump' function).
#'
#' @usage # ZeroForecaster$new(pjump)
#'
#' Forecast encapsulation as a list (days where prediction occur) of lists (components).
#'
#' The private field .pred is a list where each cell contains the predicted variables for
-#' a period of time of H<=24 hours, from hour P until P+H-1, where P == predict_from.
+#' a period of time of H-P+1<=24 hours, from hour P until H, where P == predict_from.
#' \code{forecast$getForecast(i)} output forecasts for
#' \code{data$getSerie(forecast$getIndexInData(i))}.
#'
#' example "Neighbors" method stores informations about the considered neighborhood for
#' the current prediction task) and one main function: \code{predictSerie()}. This last
#' function (by default) calls \code{predictShape()} to get a forecast of a centered
-#' serie, and then calls the "jump prediction" function -- see "field" section -- to
-#' adjust it based on the last observed values.
+#' serie, and then calls the "jump prediction" function if it's provided -- see "field"
+#' section -- to adjust it based on the last observed values. The main method in derived
+#' forecasters is \code{predictShape()}; see 'Methods' section.
#'
#' @usage # Forecaster$new(pjump) #warning: predictShape() is unimplemented
#'
#' @field .pjump Function: how to predict the jump at day interface? The arguments of
#' this function are -- in this order:
#' \itemize{
-#' \item data : object output of \code{getData()},
-#' \item today : index (integer or date) of the last known day in data,
-#' \item memory : number of days to use in the past (including today),
-#' \item horizon : number of time steps to predict,
-#' \item params : optimized parameters in the main method \code{predictShape()},
-#' \item ... : additional arguments.
+#' \item data: object output of \code{getData()},
+#' \item today: index of the current day in data (known until predict_from-1),
+#' \item memory: number of days to use in the past (including today),
+#' \item predict_from: first time step to predict (in [1,24])
+#' \item horizon: last time step to predict (in [predict_from,24]),
+#' \item params: optimized parameters in the main method \code{predictShape()},
+#' \item ...: additional arguments.
#' }
#' .pjump returns an estimation of the jump after the last observed value.
#'
#' @section Methods:
#' \describe{
#' \item{\code{initialize(data, pjump)}}{
-#' Initialize a Forecaster object with a Data object and a jump prediction function.}
-#' \item{\code{predictSerie(today,memory,horizon,...)}}{
-#' Predict a new serie of \code{horizon} values at day index \code{today} using
-#' \code{memory} days in the past.}
-#' \item{\code{predictShape(today,memory,horizon,...)}}{
-#' Predict a new shape of \code{horizon} values at day index \code{today} using
+#' Initialize a Forecaster object with a Data object and a jump prediction function,
+#' or NULL if \code{predictShape()} returns an adjusted curve.}
+#' \item{\code{predictSerie(data,today,memory,predict_from,horizon,...)}}{
+#' Predict the next curve (at index today) from predict_from to horizon (hours), using
#' \code{memory} days in the past.}
+#' \item{\code{predictShape(data,today,memory,predict_from,horizon,...)}}{
+#' Predict the shape of the next curve (at index today) from predict_from to horizon
+#' (hours), using \code{memory} days in the past.}
#' \item{\code{getParameters()}}{
#' Return (internal) parameters.}
#' }
predictSerie = function(data, today, memory, predict_from, horizon, ...)
{
# Parameters (potentially) computed during shape prediction stage
- predicted_shape = self$predictShape(data,today,memory,predict_from,horizon,...)
- predicted_delta = private$.pjump(data, today, memory, predict_from, horizon,
- private$.params, ...)
+ predicted_shape <- self$predictShape(data,today,memory,predict_from,horizon,...)
+ predicted_delta <-
+ if (is.null(private$.pjump))
+ NULL
+ else
+ private$.pjump(data,today,memory,predict_from,horizon,private$.params,...)
- # Predicted shape is aligned on the end of current day + jump
+ # Predicted shape is aligned on the end of current day + jump (if jump!=NULL)
c( data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()],
- predicted_shape - predicted_shape[1] + predicted_delta +
- ifelse(predict_from>=2,
- data$getSerie(today)[predict_from-1], tail(data$getSerie(today-1),1)) )
+ predicted_shape + ifelse( is.null(private$.pjump),
+ 0,
+ predicted_delta - predicted_shape[1] +
+ ifelse(predict_from>=2,
+ data$getSerie(today)[predict_from-1], tail(data$getSerie(today-1),1)) ) )
},
predictShape = function(data, today, memory, predict_from, horizon, ...)
NULL #empty default implementation: to implement in inherited classes
#'
#' @param data Object of class \code{Data} output of \code{getData}
#' @param pred Object of class \code{Forecast} output of \code{computeForecast}
-#' @param horizon Horizon where to compute the error
-#' (<= horizon used in \code{computeForecast})
+#' @param predict_from First time step to consider (>= predict_from used in
+#' \code{computeForecast()})
+#' @param horizon Horizon where to compute the error (<= horizon used in
+#' \code{computeForecast})
#'
#' @return A list (abs,MAPE) of lists (day,indices). The "indices" slots contain series
#' of size L where L is the number of predicted days; i-th value is the averaged error
#' Compute forecast
#'
-#' Predict time-series curves ("tomorrows") at the selected days indices ("todays").
-#' This function just runs a loop over all requested indices, and stores the individual
-#' forecasts into a list which is then turned into a Forecast object.
+#' Predict time-series curves ("today" from predict_from to horizon) at the selected days
+#' indices ("today" from 1am to predict_from-1). This function just runs a loop over all
+#' requested indices, and stores the individual forecasts into a Forecast object.
#'
#' @param data Object of class Data, output of \code{getData()}.
#' @param indices Indices where to forecast (the day after); integers relative to the
#' beginning of data, or (convertible to) Date objects.
#' @param forecaster Name of the main forecaster; more details: ?F_<forecastername>
-#' \itemize{
-#' \item Persistence : use last (similar, next) day
-#' \item Neighbors : weighted tomorrows of similar days
-#' \item Average : average tomorrow of all same day-in-week
-#' \item Zero : just output 0 (benchmarking purpose)
-#' }
+#' \itemize{
+#' \item Persistence : use last (similar) day
+#' \item Neighbors : weighted similar days
+#' \item Average : average curve of all same day-in-week
+#' \item Zero : just output 0 (benchmarking purpose)
+#' }
#' @param pjump Function to predict the jump at the interface between two days;
#' more details: ?J_<functionname>
-#' \itemize{
-#' \item Persistence : use last (similar, next) day
-#' \item Neighbors: re-use the weights from F_Neighbors
-#' \item Zero: just output 0 (no adjustment)
-#' }
+#' \itemize{
+#' \item Persistence : use last (similar) day
+#' \item Neighbors: re-use the weights from F_Neighbors
+#' \item Zero: just output 0 (no adjustment)
+#' }
+#' If pjump=NULL, then no adjustment is performed (output of \code{predictShape()} is
+#' used directly).
+#' @param predict_from First time step to predict.
#' @param memory Data depth (in days) to be used for prediction.
-#' @param horizon Number of time steps to predict.
+#' @param horizon Last time step to predict.
#' @param ncores Number of cores for parallel execution (1 to disable).
#' @param ... Additional parameters for the forecasting models.
#'
#' @examples
#' ts_data <- system.file("extdata","pm10_mesures_H_loc.csv",package="talweg")
#' exo_data <- system.file("extdata","meteo_extra_noNAs.csv",package="talweg")
-#' data <- getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT",
-#' predict_at=7, limit=200)
+#' data <- getData(ts_data, exo_data, limit=200)
#' pred <- computeForecast(data, 100:130, "Persistence", "Zero",
-#' memory=50, horizon=12, ncores=1)
-#' \dontrun{#Sketch for real-time mode:
+#' predict_from=8, memory=50, horizon=12, ncores=1)
+#' \dontrun{
+#' #Sketch for real-time mode:
#' data <- Data$new()
#' forecaster <- MyForecaster$new(myJumpPredictFunc)
#' repeat {
-#' # In the morning 7am+ or afternoon 1pm+:
+#' # As soon as daily predictions are available:
#' data$append(
-#' times_from_H+1_yersteday_to_Hnow,
-#' PM10_values_of_last_24h,
-#' exogenous_measures_of_last_24h,
-#' exogenous_predictions_for_next_24h)
+#' level_hat=predicted_level,
+#' exo_hat=predicted_exogenous)
+#' # When a day ends:
+#' data$append(
+#' level=observed_level,
+#' exo=observed_exogenous)
+#' # And, at every hour:
+#' data$append(
+#' time=current_hour,
+#' value=current_PM10)
+#' # Finally, a bit before predict_from hour:
#' pred <- forecaster$predictSerie(data, data$getSize(), ...)
#' #do_something_with_pred
-#' }}
+#' } }
#' @export
computeForecast = function(data, indices, forecaster, pjump, predict_from,
memory=Inf, horizon=length(data$getSerie(1)), ncores=3, ...)
integer_indices = sapply(indices, function(i) dateIndexToInteger(i,data))
if (any(integer_indices<=0 | integer_indices>data$getSize()))
stop("Indices out of range")
- if (!is.character(forecaster) || !is.character(pjump))
- stop("forecaster (name) and pjump (function) should be of class character")
+ if (!is.character(forecaster))
+ stop("forecaster (name): character")
+ if (!is.null(pjump) && !is.character(pjump))
+ stop("pjump (function): character or NULL")
pred = Forecast$new( sapply(indices, function(i) integerIndexToDate(i,data)) )
forecaster_class_name = getFromNamespace(
paste(forecaster,"Forecaster",sep=""), "talweg")
- forecaster = forecaster_class_name$new( #.pjump =
- getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg"))
+
+ if (!is.null(pjump))
+ pjump <- getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg")
+ forecaster = forecaster_class_name$new(pjump)
computeOneForecast <- function(i)
{
series = data$getSeries(indices)
yrange = quantile(series, probs=c(0.025,0.975), na.rm=TRUE)
par(mar=c(4.7,5,1,1), cex.axis=1.5, cex.lab=1.5)
- for (i in seq_along(indices))
- {
- plot(series[,i], type="l", ylim=yrange,
- xlab=ifelse(i==1,"Time (hours)",""), ylab=ifelse(i==1,"PM10",""))
- if (i < length(indices))
- par(new=TRUE)
- }
+ matplot(series, type="l", ylim=yrange, xlab="Time (hours)", ylab="PM10")
}
#' Plot error