pkg/R/F_Neighbors2.R

   1 #' @include Forecaster.R
   2 #'
   3 #' Neighbors2 Forecaster
   4 #'
   5 #' Predict tomorrow as a weighted combination of "futures of the past" days.
   6 #' Inherits \code{\link{Forecaster}}
   7 #'
   8 Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
   9     inherit = Forecaster,
  10
  11     public = list(
  12         predictShape = function(data, today, memory, horizon, ...)
  13         {
  14             # (re)initialize computed parameters
  15             private$.params <- list("weights"=NA, "indices"=NA, "window"=NA)
  16
  17             # Do not forecast on days with NAs (TODO: softer condition...)
  18             if (any(is.na(data$getCenteredSerie(today))))
  19                 return (NA)
  20
  21             # Determine indices of no-NAs days followed by no-NAs tomorrows
  22             fdays = getNoNA2(data, max(today-memory,1), today-1)
  23
  24             # Get optional args
  25             kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan"
  26             if (hasArg(h_window))
  27             {
  28                 return ( private$.predictShapeAux(data,
  29                     fdays, today, horizon, list(...)$h_window, kernel, TRUE) )
  30             }
  31
  32             # Indices of similar days for cross-validation; TODO: 45 = magic number
  33             sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
  34
  35             # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
  36             errorOnLastNdays = function(h, kernel)
  37             {
  38                 error = 0
  39                 nb_jours = 0
  40                 for (day in intersect(fdays,sdays))
  41                 {
  42                     # mix_strategy is never used here (simtype != "mix"), therefore left blank
  43                     prediction = private$.predictShapeAux(data,fdays,day,horizon,h,kernel,FALSE)
  44                     if (!is.na(prediction[1]))
  45                     {
  46                         nb_jours = nb_jours + 1
  47                         error = error +
  48                             mean((data$getSerie(i+1)[1:horizon] - prediction)^2)
  49                     }
  50                 }
  51                 return (error / nb_jours)
  52             }
  53
  54             # h :: only for endo in this variation
  55             h_best = optimize(errorOnLastNdays, c(0,7), kernel=kernel)$minimum
  56             return (private$.predictShapeAux(data,fdays,today,horizon,h_best,kernel,TRUE))
  57         }
  58     ),
  59     private = list(
  60         # Precondition: "today" is full (no NAs)
  61         .predictShapeAux = function(data, fdays, today, horizon, h, kernel, final_call)
  62         {
  63             fdays = fdays[ fdays < today ]
  64             # TODO: 3 = magic number
  65             if (length(fdays) < 3)
  66                 return (NA)
  67
  68             # Neighbors: days in "same season"
  69             sdays = getSimilarDaysIndices(today, limit=45, same_season=TRUE, data)
  70             indices = intersect(fdays,sdays)
  71             levelToday = data$getLevel(today)
  72             distances = sapply(seq_along(indices), function(i) abs(data$getLevel(i)-levelToday))
  73             same_pollution = (distances <= 2)
  74             if (sum(same_pollution) < 3) #TODO: 3 == magic number
  75             {
  76                 same_pollution = (distances <= 5)
  77                 if (sum(same_pollution) < 3)
  78                     return (NA)
  79             }
  80             indices = indices[same_pollution]
  81
  82             # Now OK: indices same season, same pollution level
  83             # ...........
  84
  85
  86             # ENDO:: Distances from last observed day to days in the past
  87             serieToday = data$getSerie(today)
  88             distances2 = sapply(indices, function(i) {
  89                 delta = serieToday - data$getSerie(i)
  90                 distances2[i] = mean(delta^2)
  91             })
  92
  93             sd_dist = sd(distances2)
  94             if (sd_dist < .Machine$double.eps)
  95             {
  96 #                   warning("All computed distances are very close: stdev too small")
  97                 sd_dist = 1 #mostly for tests... FIXME:
  98             }
  99             simils_endo =
 100                 if (kernel=="Gauss")
 101                     exp(-distances2/(sd_dist*h_endo^2))
 102                 else
 103                 {
 104                     # Epanechnikov
 105                     u = 1 - distances2/(sd_dist*h_endo^2)
 106                     u[abs(u)>1] = 0.
 107                     u
 108                 }
 109
 110 #           # EXOGENS: distances computations are enough
 111 #           # TODO: search among similar concentrations....... at this stage ?!
 112 #           M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
 113 #           M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
 114 #           for (i in seq_along(fdays))
 115 #               M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
 116 #
 117 #           sigma = cov(M) #NOTE: robust covariance is way too slow
 118 #           sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
 119 #
 120 #           # Distances from last observed day to days in the past
 121 #           distances2 = rep(NA, nrow(M)-1)
 122 #           for (i in 2:nrow(M))
 123 #           {
 124 #               delta = M[1,] - M[i,]
 125 #               distances2[i-1] = delta %*% sigma_inv %*% delta
 126 #           }
 127
 128             similarities = simils_endo
 129
 130             prediction = rep(0, horizon)
 131             for (i in seq_along(indices))
 132                 prediction = prediction + similarities[i] * data$getSerie(indices[i]+1)[1:horizon]
 133             prediction = prediction / sum(similarities, na.rm=TRUE)
 134
 135             if (final_call)
 136             {
 137                 private$.params$weights <- similarities
 138                 private$.params$indices <- indices
 139                 private$.params$window <- h
 140             }
 141
 142             return (prediction)
 143         }
 144     )
 145 )