#' Neighbors Forecaster
#'
-#' Predict next serie as a weighted combination of "futures of the past" days,
-#' where days in the past are chosen and weighted according to some similarity measures.
+#' Predict next serie as a weighted combination of curves observed on "similar" days in
+#' the past (and future if 'opera'=FALSE); the nature of the similarity is controlled by
+#' the options 'simtype' and 'local' (see below).
#'
-#' The main method is \code{predictShape()}, taking arguments data, today, memory,
-#' predict_from, horizon respectively for the dataset (object output of
-#' \code{getData()}), the current index, the data depth (in days), the first predicted
-#' hour and the last predicted hour.
-#' In addition, optional arguments can be passed:
+#' Optional arguments:
#' \itemize{
-#' \item local : TRUE (default) to constrain neighbors to be "same days within same
-#' season"
-#' \item simtype : 'endo' for a similarity based on the series only,<cr>
+#' \item local: TRUE (default) to constrain neighbors to be "same days in same season"
+#' \item simtype: 'endo' for a similarity based on the series only,<cr>
#' 'exo' for a similarity based on exogenous variables only,<cr>
#' 'mix' for the product of 'endo' and 'exo',<cr>
#' 'none' (default) to apply a simple average: no computed weights
-#' \item window : A window for similarities computations; override cross-validation
+#' \item window: A window for similarities computations; override cross-validation
#' window estimation.
#' }
#' The method is summarized as follows:
#' \enumerate{
-#' \item Determine N (=20) recent days without missing values, and followed by a
-#' tomorrow also without missing values.
+#' \item Determine N (=20) recent days without missing values, and preceded by a
+#' curve also without missing values.
#' \item Optimize the window parameters (if relevant) on the N chosen days.
#' \item Considering the optimized window, compute the neighbors (with locality
#' constraint or not), compute their similarities -- using a gaussian kernel if
if (!opera)
tdays = setdiff(tdays, today) #always exclude current day
- # Shortcut if window is known or local==TRUE && simtype==none
- if (hasArg("window") || (local && simtype=="none"))
+ # Shortcut if window is known #TODO: cross-validation for number of days, on similar (yerste)days
+ if (hasArg("window"))
{
return ( private$.predictShapeAux(data, tdays, today, predict_from, horizon,
local, list(...)$window, simtype, opera, TRUE) )
# limit=Inf to not censor any day (TODO: finite limit? 60?)
tdays = getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE,
days_in=tdays_cut, operational=opera)
-# if (length(tdays) <= 1)
-# return (NA)
# TODO: 10 == magic number
tdays = .getConstrainedNeighbs(today, data, tdays, min_neighbs=10)
if (length(tdays) == 1)
}
return ( data$getSerie(tdays[1])[predict_from:horizon] )
}
- max_neighbs = 10 #TODO: 12 = arbitrary number
+ max_neighbs = 12 #TODO: 10 or 12 or... ?
if (length(tdays) > max_neighbs)
{
distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
ordering <- order(distances2)
tdays <- tdays[ ordering[1:max_neighbs] ]
-
- print("VVVVV")
- print(sort(distances2)[1:max_neighbs])
- print(integerIndexToDate(today,data))
- print(lapply(tdays,function(i) integerIndexToDate(i,data)))
- print(rbind(data$getSeries(tdays-1), data$getSeries(tdays)))
}
}
else
.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10)
{
levelToday = data$getLevelHat(today)
-# levelYersteday = data$getLevel(today-1)
- distances = sapply(tdays, function(i) {
-# sqrt((data$getLevel(i-1)-levelYersteday)^2 + (data$getLevel(i)-levelToday)^2)
- abs(data$getLevel(i)-levelToday)
- })
+ distances = sapply( tdays, function(i) abs(data$getLevel(i) - levelToday) )
#TODO: 1, +1, +3 : magic numbers
dist_thresh = 1
min_neighbs = min(min_neighbs,length(tdays))
break
dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
}
- tdays = tdays[same_pollution]
-# max_neighbs = 12
-# if (nb_neighbs > max_neighbs)
-# {
-# # Keep only max_neighbs closest neighbors
-# tdays = tdays[ order(distances[same_pollution])[1:max_neighbs] ]
-# }
- tdays
+ tdays[same_pollution]
}
# compute similarities
sapply(tdays, function(i) {
delta = lastSerie - c(data$getSerie(i-1),
data$getSerie(i)[if (predict_from>=2) 1:(predict_from-1) else c()])
-# sqrt(mean(delta^2))
- sqrt(sum(delta^2))
+ sqrt(mean(delta^2))
})
}