#' Neighbors Forecaster
#'
-#' Predict next serie as a weighted combination of "futures of the past" days,
-#' where days in the past are chosen and weighted according to some similarity measures.
+#' Predict next serie as a weighted combination of curves observed on "similar" days in
+#' the past (and future if 'opera'=FALSE); the nature of the similarity is controlled by
+#' the options 'simtype' and 'local' (see below).
#'
-#' The main method is \code{predictShape()}, taking arguments data, today, memory,
-#' predict_from, horizon respectively for the dataset (object output of
-#' \code{getData()}), the current index, the data depth (in days), the first predicted
-#' hour and the last predicted hour.
-#' In addition, optional arguments can be passed:
+#' Optional arguments:
#' \itemize{
-#' \item local : TRUE (default) to constrain neighbors to be "same days within same
-#' season"
-#' \item simtype : 'endo' for a similarity based on the series only,<cr>
+#' \item local: TRUE (default) to constrain neighbors to be "same days in same season"
+#' \item simtype: 'endo' for a similarity based on the series only,<cr>
#' 'exo' for a similarity based on exogenous variables only,<cr>
#' 'mix' for the product of 'endo' and 'exo',<cr>
#' 'none' (default) to apply a simple average: no computed weights
-#' \item window : A window for similarities computations; override cross-validation
+#' \item window: A window for similarities computations; override cross-validation
#' window estimation.
#' }
#' The method is summarized as follows:
#' \enumerate{
-#' \item Determine N (=20) recent days without missing values, and followed by a
-#' tomorrow also without missing values.
+#' \item Determine N (=20) recent days without missing values, and preceded by a
+#' curve also without missing values.
#' \item Optimize the window parameters (if relevant) on the N chosen days.
#' \item Considering the optimized window, compute the neighbors (with locality
#' constraint or not), compute their similarities -- using a gaussian kernel if
if (!opera)
tdays = setdiff(tdays, today) #always exclude current day
- # Shortcut if window is known
- if (hasArg("window"))
+ # Shortcut if window is known or local==TRUE && simtype==none
+ if (hasArg("window") || (local && simtype=="none"))
{
return ( private$.predictShapeAux(data, tdays, today, predict_from, horizon,
local, list(...)$window, simtype, opera, TRUE) )
if (local)
{
- # TODO: 60 == magic number
- tdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE,
+ # limit=Inf to not censor any day (TODO: finite limit? 60?)
+ tdays = getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE,
days_in=tdays_cut, operational=opera)
-# if (length(tdays) <= 1)
-# return (NA)
# TODO: 10 == magic number
tdays = .getConstrainedNeighbs(today, data, tdays, min_neighbs=10)
if (length(tdays) == 1)
}
return ( data$getSerie(tdays[1])[predict_from:horizon] )
}
+ max_neighbs = 12 #TODO: 10 or 12 or... ?
+ if (length(tdays) > max_neighbs)
+ {
+ distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
+ ordering <- order(distances2)
+ tdays <- tdays[ ordering[1:max_neighbs] ]
+ }
}
else
tdays = tdays_cut #no conditioning
window_endo = ifelse(simtype=="mix", window[1], window)
# Distances from last observed day to selected days in the past
+ # TODO: redundant computation if local==TRUE
distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
- if (local)
- {
- max_neighbs = 12 #TODO: 12 = arbitrary number
- if (length(distances2) > max_neighbs)
- {
- ordering <- order(distances2)
- tdays <- tdays[ ordering[1:max_neighbs] ]
- distances2 <- distances2[ ordering[1:max_neighbs] ]
- }
- }
-
simils_endo <- .computeSimils(distances2, window_endo)
}
.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10)
{
levelToday = data$getLevelHat(today)
-# levelYersteday = data$getLevel(today-1)
- distances = sapply(tdays, function(i) {
-# sqrt((data$getLevel(i-1)-levelYersteday)^2 + (data$getLevel(i)-levelToday)^2)
- abs(data$getLevel(i)-levelToday)
- })
+ distances = sapply( tdays, function(i) abs(data$getLevel(i) - levelToday) )
#TODO: 1, +1, +3 : magic numbers
dist_thresh = 1
min_neighbs = min(min_neighbs,length(tdays))
break
dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
}
- tdays = tdays[same_pollution]
-# max_neighbs = 12
-# if (nb_neighbs > max_neighbs)
-# {
-# # Keep only max_neighbs closest neighbors
-# tdays = tdays[ order(distances[same_pollution])[1:max_neighbs] ]
-# }
- tdays
+ tdays[same_pollution]
}
# compute similarities