#' Neighbors Forecaster
#'
-#' Predict next serie as a weighted combination of "futures of the past" days,
-#' where days in the past are chosen and weighted according to some similarity measures.
+#' Predict next serie as a weighted combination of curves observed on "similar" days in
+#' the past (and future if 'opera'=FALSE); the nature of the similarity is controlled by
+#' the options 'simtype' and 'local' (see below).
#'
-#' The main method is \code{predictShape()}, taking arguments data, today, memory,
-#' predict_from, horizon respectively for the dataset (object output of
-#' \code{getData()}), the current index, the data depth (in days), the first predicted
-#' hour and the last predicted hour.
-#' In addition, optional arguments can be passed:
+#' Optional arguments:
#' \itemize{
-#' \item local : TRUE (default) to constrain neighbors to be "same days within same
-#' season"
-#' \item simtype : 'endo' for a similarity based on the series only,<cr>
+#' \item local: TRUE (default) to constrain neighbors to be "same days in same season"
+#' \item simtype: 'endo' for a similarity based on the series only,<cr>
#' 'exo' for a similarity based on exogenous variables only,<cr>
#' 'mix' for the product of 'endo' and 'exo',<cr>
#' 'none' (default) to apply a simple average: no computed weights
-#' \item window : A window for similarities computations; override cross-validation
+#' \item window: A window for similarities computations; override cross-validation
#' window estimation.
#' }
#' The method is summarized as follows:
#' \enumerate{
-#' \item Determine N (=20) recent days without missing values, and followed by a
-#' tomorrow also without missing values.
+#' \item Determine N (=20) recent days without missing values, and preceded by a
+#' curve also without missing values.
#' \item Optimize the window parameters (if relevant) on the N chosen days.
#' \item Considering the optimized window, compute the neighbors (with locality
#' constraint or not), compute their similarities -- using a gaussian kernel if
best_window_exo = optimize(
errorOnLastNdays, c(0,7), simtype="exo")$minimum
}
+ if (local)
+ {
+ best_window_local = optimize(
+ errorOnLastNdays, c(3,30), simtype="none")$minimum
+ }
best_window =
if (simtype == "endo")
best_window_exo
else if (simtype == "mix")
c(best_window_endo,best_window_exo)
- else #none: value doesn't matter
- 1
+ else #none: no value
+ NULL
+ if (local)
+ best_window = c(best_window, best_window_local)
return( private$.predictShapeAux(data, tdays, today, predict_from, horizon, local,
best_window, simtype, opera, TRUE) )
if (local)
{
- # TODO: 60 == magic number
- tdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE,
+ # limit=Inf to not censor any day (TODO: finite limit? 60?)
+ tdays <- getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE,
days_in=tdays_cut, operational=opera)
-# if (length(tdays) <= 1)
-# return (NA)
+ nb_neighbs <- round( window[length(window)] )
# TODO: 10 == magic number
- tdays = .getConstrainedNeighbs(today, data, tdays, min_neighbs=10)
+ tdays <- .getConstrainedNeighbs(today, data, tdays, min_neighbs=nb_neighbs)
if (length(tdays) == 1)
{
if (final_call)
{
private$.params$weights <- 1
private$.params$indices <- tdays
- private$.params$window <- 1
+ private$.params$window <- window
}
return ( data$getSerie(tdays[1])[predict_from:horizon] )
}
+ max_neighbs = nb_neighbs #TODO: something else?
+ if (length(tdays) > max_neighbs)
+ {
+ distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
+ ordering <- order(distances2)
+ tdays <- tdays[ ordering[1:max_neighbs] ]
+ }
}
else
tdays = tdays_cut #no conditioning
if (simtype == "endo" || simtype == "mix")
{
- # Compute endogen similarities using given window
- window_endo = ifelse(simtype=="mix", window[1], window)
-
# Distances from last observed day to selected days in the past
+ # TODO: redundant computation if local==TRUE
distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
- if (local)
- {
- max_neighbs = 12 #TODO: 12 = arbitrary number
- if (length(distances2) > max_neighbs)
- {
- ordering <- order(distances2)
- tdays <- tdays[ ordering[1:max_neighbs] ]
- distances2 <- distances2[ ordering[1:max_neighbs] ]
- }
- }
-
- simils_endo <- .computeSimils(distances2, window_endo)
+ # Compute endogen similarities using the given window
+ simils_endo <- .computeSimils(distances2, window[1])
}
if (simtype == "exo" || simtype == "mix")
{
- # Compute exogen similarities using given window
- window_exo = ifelse(simtype=="mix", window[2], window)
-
distances2 <- .computeDistsExo(data, today, tdays)
+ # Compute exogen similarities using the given window
+ window_exo = ifelse(simtype=="mix", window[2], window[1])
simils_exo <- .computeSimils(distances2, window_exo)
}
{
private$.params$weights <- similarities
private$.params$indices <- tdays
- private$.params$window <-
- if (simtype=="endo")
- window_endo
- else if (simtype=="exo")
- window_exo
- else if (simtype=="mix")
- c(window_endo,window_exo)
- else #none
- 1
+ private$.params$window <- window
}
return (prediction)
.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10)
{
levelToday = data$getLevelHat(today)
-# levelYersteday = data$getLevel(today-1)
- distances = sapply(tdays, function(i) {
-# sqrt((data$getLevel(i-1)-levelYersteday)^2 + (data$getLevel(i)-levelToday)^2)
- abs(data$getLevel(i)-levelToday)
- })
+ distances = sapply( tdays, function(i) abs(data$getLevel(i) - levelToday) )
#TODO: 1, +1, +3 : magic numbers
dist_thresh = 1
min_neighbs = min(min_neighbs,length(tdays))
break
dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
}
- tdays = tdays[same_pollution]
-# max_neighbs = 12
-# if (nb_neighbs > max_neighbs)
-# {
-# # Keep only max_neighbs closest neighbors
-# tdays = tdays[ order(distances[same_pollution])[1:max_neighbs] ]
-# }
- tdays
+ tdays[same_pollution]
}
# compute similarities