#' where days in the past are chosen and weighted according to some similarity measures.
#'
#' The main method is \code{predictShape()}, taking arguments data, today, memory,
-#' horizon respectively for the dataset (object output of \code{getData()}), the current
-#' index, the data depth (in days) and the number of time steps to forecast.
+#' predict_from, horizon respectively for the dataset (object output of
+#' \code{getData()}), the current index, the data depth (in days), the first predicted
+#' hour and the last predicted hour.
#' In addition, optional arguments can be passed:
#' \itemize{
#' \item local : TRUE (default) to constrain neighbors to be "same days within same
#' season"
#' \item simtype : 'endo' for a similarity based on the series only,<cr>
-#' 'exo' for a similaruty based on exogenous variables only,<cr>
+#' 'exo' for a similarity based on exogenous variables only,<cr>
#' 'mix' for the product of 'endo' and 'exo',<cr>
#' 'none' (default) to apply a simple average: no computed weights
#' \item window : A window for similarities computations; override cross-validation
#' obtain the final prediction.
#' }
#'
-#' @usage f <- NeighborsForecaster$new(pjump)
+#' @usage # NeighborsForecaster$new(pjump)
#'
#' @docType class
#' @format R6 class, inherits Forecaster
inherit = Forecaster,
public = list(
- predictShape = function(data, today, memory, horizon, ...)
+ predictShape = function(data, today, memory, predict_from, horizon, ...)
{
# (re)initialize computed parameters
private$.params <- list("weights"=NA, "indices"=NA, "window"=NA)
# Do not forecast on days with NAs (TODO: softer condition...)
- if (any(is.na(data$getCenteredSerie(today))))
+ if (any(is.na(data$getSerie(today-1)))
+ || any(is.na(data$getSerie(today)[1:(predict_from-1)])))
+ {
return (NA)
+ }
# Determine indices of no-NAs days followed by no-NAs tomorrows
- fdays = .getNoNA2(data, max(today-memory,1), today-1)
+ fdays = .getNoNA2(data, max(today-memory,1), today-2)
# Get optional args
local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season?
if (hasArg("window"))
{
return ( private$.predictShapeAux(data,
- fdays, today, horizon, local, list(...)$window, simtype, TRUE) )
+ fdays, today, predict_from, horizon, local, list(...)$window, simtype, TRUE) )
}
# Indices of similar days for cross-validation; TODO: 20 = magic number
for (i in seq_along(cv_days))
{
# mix_strategy is never used here (simtype != "mix"), therefore left blank
- prediction = private$.predictShapeAux(data, fdays, cv_days[i], horizon, local,
- window, simtype, FALSE)
+ prediction = private$.predictShapeAux(data, fdays, cv_days[i], predict_from,
+ horizon, local, window, simtype, FALSE)
if (!is.na(prediction[1]))
{
nb_jours = nb_jours + 1
error = error +
- mean((data$getSerie(cv_days[i]+1)[1:horizon] - prediction)^2)
+ mean((data$getSerie(cv_days[i]+1)[predict_from:horizon] - prediction)^2)
}
}
return (error / nb_jours)
else #none: value doesn't matter
1
- return(private$.predictShapeAux(data, fdays, today, horizon, local,
- best_window, simtype, TRUE))
+ return( private$.predictShapeAux(data, fdays, today, predict_from, horizon, local,
+ best_window, simtype, TRUE) )
}
),
private = list(
# Precondition: "today" is full (no NAs)
- .predictShapeAux = function(data, fdays, today, horizon, local, window, simtype,
- final_call)
+ .predictShapeAux = function(data, fdays, today, predict_from, horizon, local, window,
+ simtype, final_call)
{
fdays_cut = fdays[ fdays < today ]
if (length(fdays_cut) <= 1)
private$.params$indices <- fdays
private$.params$window <- 1
}
- return ( data$getSerie(fdays[1])[1:horizon] )
+ return ( data$getSerie(fdays[1]+1)[predict_from:horizon] )
}
}
else
window_endo = ifelse(simtype=="mix", window[1], window)
# Distances from last observed day to days in the past
- serieToday = data$getSerie(today)
+ lastSerie = c( data$getSerie(today-1), data$getSerie(today)[1:(predict_from-1)] )
distances2 = sapply(fdays, function(i) {
- delta = serieToday - data$getSerie(i)
- mean(delta^2)
+ delta = lastSerie - c(data$getSerie(i),data$getSerie(i+1)[1:(predict_from-1)])
+ sqrt(mean(delta^2))
})
simils_endo <- .computeSimils(distances2, window_endo)
# Compute exogen similarities using given window
window_exo = ifelse(simtype=="mix", window[2], window)
- M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
- M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
+ M = matrix( ncol=1+length(fdays), nrow=1+length(data$getExo(1)) )
+ M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) )
for (i in seq_along(fdays))
- M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
+ M[,i+1] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
- sigma = cov(M) #NOTE: robust covariance is way too slow
+ sigma = cov(t(M)) #NOTE: robust covariance is way too slow
# TODO: 10 == magic number; more robust way == det, or always ginv()
sigma_inv =
if (length(fdays) > 10)
# Distances from last observed day to days in the past
distances2 = sapply(seq_along(fdays), function(i) {
- delta = M[1,] - M[i+1,]
+ delta = M[,1] - M[,i+1]
delta %*% sigma_inv %*% delta
})
rep(1, length(fdays))
similarities = similarities / sum(similarities)
- prediction = rep(0, horizon)
+ prediction = rep(0, horizon-predict_from+1)
for (i in seq_along(fdays))
- prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon]
+ {
+ prediction = prediction +
+ similarities[i] * data$getSerie(fdays[i]+1)[predict_from:horizon]
+ }
if (final_call)
{
#
.getConstrainedNeighbs = function(today, data, fdays, min_neighbs=10, max_neighbs=12)
{
- levelToday = data$getLevel(today)
- distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday))
- #TODO: 2, +3 : magic numbers
- dist_thresh = 2
+ levelToday = data$getLevelHat(today)
+ levelYersteday = data$getLevel(today-1)
+ distances = sapply(fdays, function(i) {
+ sqrt((data$getLevel(i)-levelYersteday)^2 + (data$getLevel(i+1)-levelToday)^2)
+ })
+ #TODO: 1, +1, +3 : magic numbers
+ dist_thresh = 1
min_neighbs = min(min_neighbs,length(fdays))
repeat
{
nb_neighbs = sum(same_pollution)
if (nb_neighbs >= min_neighbs) #will eventually happen
break
- dist_thresh = dist_thresh + 3
+ dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
}
fdays = fdays[same_pollution]
max_neighbs = 12
if (nb_neighbs > max_neighbs)
{
# Keep only max_neighbs closest neighbors
- fdays = fdays[
- sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ]
+ fdays = fdays[ order(distances[same_pollution])[1:max_neighbs] ]
}
fdays
}