#' Neighbors Forecaster
#'
-#' Predict next serie as a weighted combination of "futures of the past" days,
-#' where days in the past are chosen and weighted according to some similarity measures.
+#' Predict next serie as a weighted combination of curves observed on "similar" days in
+#' the past (and future if 'opera'=FALSE); the nature of the similarity is controlled by
+#' the options 'simtype' and 'local' (see below).
#'
-#' The main method is \code{predictShape()}, taking arguments data, today, memory,
-#' predict_from, horizon respectively for the dataset (object output of
-#' \code{getData()}), the current index, the data depth (in days), the first predicted
-#' hour and the last predicted hour.
-#' In addition, optional arguments can be passed:
+#' Optional arguments:
#' \itemize{
-#' \item local : TRUE (default) to constrain neighbors to be "same days within same
-#' season"
-#' \item simtype : 'endo' for a similarity based on the series only,<cr>
+#' \item local: TRUE (default) to constrain neighbors to be "same days in same season"
+#' \item simtype: 'endo' for a similarity based on the series only,<cr>
#' 'exo' for a similarity based on exogenous variables only,<cr>
#' 'mix' for the product of 'endo' and 'exo',<cr>
#' 'none' (default) to apply a simple average: no computed weights
-#' \item window : A window for similarities computations; override cross-validation
+#' \item window: A window for similarities computations; override cross-validation
#' window estimation.
#' }
#' The method is summarized as follows:
#' \enumerate{
-#' \item Determine N (=20) recent days without missing values, and followed by a
-#' tomorrow also without missing values.
+#' \item Determine N (=20) recent days without missing values, and preceded by a
+#' curve also without missing values.
#' \item Optimize the window parameters (if relevant) on the N chosen days.
#' \item Considering the optimized window, compute the neighbors (with locality
#' constraint or not), compute their similarities -- using a gaussian kernel if
if (!opera)
tdays = setdiff(tdays, today) #always exclude current day
- # Shortcut if window is known or local==TRUE && simtype==none
- if (hasArg("window") || (local && simtype=="none"))
+ # Shortcut if window is known
+ if (hasArg("window"))
{
return ( private$.predictShapeAux(data, tdays, today, predict_from, horizon,
local, list(...)$window, simtype, opera, TRUE) )
best_window_exo = optimize(
errorOnLastNdays, c(0,7), simtype="exo")$minimum
}
+ if (local)
+ {
+ best_window_local = optimize(
+ errorOnLastNdays, c(3,30), simtype="none")$minimum
+ }
best_window =
if (simtype == "endo")
best_window_exo
else if (simtype == "mix")
c(best_window_endo,best_window_exo)
- else #none: value doesn't matter
- 1
+ else #none: no value
+ NULL
+ if (local)
+ best_window = c(best_window, best_window_local)
return( private$.predictShapeAux(data, tdays, today, predict_from, horizon, local,
best_window, simtype, opera, TRUE) )
if (local)
{
# limit=Inf to not censor any day (TODO: finite limit? 60?)
- tdays = getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE,
+ tdays <- getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE,
days_in=tdays_cut, operational=opera)
-# if (length(tdays) <= 1)
-# return (NA)
+ nb_neighbs <- round( window[length(window)] )
# TODO: 10 == magic number
- tdays = .getConstrainedNeighbs(today, data, tdays, min_neighbs=10)
+ tdays <- .getConstrainedNeighbs(today, data, tdays, nb_neighbs, opera)
if (length(tdays) == 1)
{
if (final_call)
{
private$.params$weights <- 1
private$.params$indices <- tdays
- private$.params$window <- 1
+ private$.params$window <- window
}
return ( data$getSerie(tdays[1])[predict_from:horizon] )
}
- max_neighbs = 10 #TODO: 12 = arbitrary number
+ max_neighbs = nb_neighbs #TODO: something else?
if (length(tdays) > max_neighbs)
{
distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
ordering <- order(distances2)
tdays <- tdays[ ordering[1:max_neighbs] ]
-
- print("VVVVV")
- print(sort(distances2)[1:max_neighbs])
- print(integerIndexToDate(today,data))
- print(lapply(tdays,function(i) integerIndexToDate(i,data)))
- print(rbind(data$getSeries(tdays-1), data$getSeries(tdays)))
}
}
else
if (simtype == "endo" || simtype == "mix")
{
- # Compute endogen similarities using given window
- window_endo = ifelse(simtype=="mix", window[1], window)
-
# Distances from last observed day to selected days in the past
# TODO: redundant computation if local==TRUE
distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
- simils_endo <- .computeSimils(distances2, window_endo)
+ # Compute endogen similarities using the given window
+ simils_endo <- .computeSimils(distances2, window[1])
}
if (simtype == "exo" || simtype == "mix")
{
- # Compute exogen similarities using given window
- window_exo = ifelse(simtype=="mix", window[2], window)
-
- distances2 <- .computeDistsExo(data, today, tdays)
+ distances2 <- .computeDistsExo(data, today, tdays, opera)
+ # Compute exogen similarities using the given window
+ window_exo = ifelse(simtype=="mix", window[2], window[1])
simils_exo <- .computeSimils(distances2, window_exo)
}
{
private$.params$weights <- similarities
private$.params$indices <- tdays
- private$.params$window <-
- if (simtype=="endo")
- window_endo
- else if (simtype=="exo")
- window_exo
- else if (simtype=="mix")
- c(window_endo,window_exo)
- else #none
- 1
+ private$.params$window <- window
}
return (prediction)
# @param min_neighbs Minimum number of points in a neighborhood
# @param max_neighbs Maximum number of points in a neighborhood
#
-.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10)
+.getConstrainedNeighbs = function(today, data, tdays, min_neighbs, opera)
{
- levelToday = data$getLevelHat(today)
-# levelYersteday = data$getLevel(today-1)
- distances = sapply(tdays, function(i) {
-# sqrt((data$getLevel(i-1)-levelYersteday)^2 + (data$getLevel(i)-levelToday)^2)
- abs(data$getLevel(i)-levelToday)
- })
+ levelToday = ifelse(opera, tail(data$getLevelHat(today),1), data$getLevel(today))
+ distances = sapply( tdays, function(i) abs(data$getLevel(i) - levelToday) )
#TODO: 1, +1, +3 : magic numbers
dist_thresh = 1
min_neighbs = min(min_neighbs,length(tdays))
break
dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
}
- tdays = tdays[same_pollution]
-# max_neighbs = 12
-# if (nb_neighbs > max_neighbs)
-# {
-# # Keep only max_neighbs closest neighbors
-# tdays = tdays[ order(distances[same_pollution])[1:max_neighbs] ]
-# }
- tdays
+ tdays[same_pollution]
}
# compute similarities
sapply(tdays, function(i) {
delta = lastSerie - c(data$getSerie(i-1),
data$getSerie(i)[if (predict_from>=2) 1:(predict_from-1) else c()])
-# sqrt(mean(delta^2))
- sqrt(sum(delta^2))
+ sqrt(mean(delta^2))
})
}
-.computeDistsExo <- function(data, today, tdays)
+.computeDistsExo <- function(data, today, tdays, opera)
{
M = matrix( ncol=1+length(tdays), nrow=1+length(data$getExo(1)) )
- M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) )
+ if (opera)
+ M[,1] = c( tail(data$getLevelHat(today),1), as.double(data$getExoHat(today)) )
+ else
+ M[,1] = c( data$getLevel(today), as.double(data$getExo(today)) )
for (i in seq_along(tdays))
M[,i+1] = c( data$getLevel(tdays[i]), as.double(data$getExo(tdays[i])) )