pkg/R/F_Neighbors2.R

   1 #' @include Forecaster.R
   2 #'
   3 #' Neighbors2 Forecaster
   4 #'
   5 #' Predict tomorrow as a weighted combination of "futures of the past" days.
   6 #' Inherits \code{\link{Forecaster}}
   7 #'
   8 Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
   9     inherit = Forecaster,
  10
  11     public = list(
  12         predictSerie = function(data, today, memory, horizon, ...)
  13         {
  14             # This method predict shape + level at the same time, all in next call
  15             self$predictShape(data, today, memory, horizon, ...)
  16         },
  17         predictShape = function(data, today, memory, horizon, ...)
  18         {
  19             # (re)initialize computed parameters
  20             private$.params <- list("weights"=NA, "indices"=NA, "window"=NA)
  21
  22             # Do not forecast on days with NAs (TODO: softer condition...)
  23             if (any(is.na(data$getCenteredSerie(today))))
  24                 return (NA)
  25
  26             # Determine indices of no-NAs days followed by no-NAs tomorrows
  27             fdays = getNoNA2(data, max(today-memory,1), today-1)
  28
  29             # Get optional args
  30             simtype = ifelse(hasArg("simtype"), list(...)$simtype, "mix") #or "endo", or "exo"
  31             kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan"
  32             if (hasArg(h_window))
  33             {
  34                 return ( private$.predictShapeAux(data,
  35                     fdays, today, horizon, list(...)$h_window, kernel, simtype, TRUE) )
  36             }
  37
  38             # Indices of similar days for cross-validation; TODO: 45 = magic number
  39             sdays = getSimilarDaysIndices(today, data, limit=45, same_season=FALSE)
  40
  41             cv_days = intersect(fdays,sdays)
  42             # Limit to 20 most recent matching days (TODO: 20 == magic number)
  43             cv_days = sort(cv_days,decreasing=TRUE)[1:min(20,length(cv_days))]
  44
  45             # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
  46             errorOnLastNdays = function(h, kernel, simtype)
  47             {
  48                 error = 0
  49                 nb_jours = 0
  50                 for (i in seq_along(cv_days))
  51                 {
  52                     # mix_strategy is never used here (simtype != "mix"), therefore left blank
  53                     prediction = private$.predictShapeAux(data,
  54                         fdays, cv_days[i], horizon, h, kernel, simtype, FALSE)
  55                     if (!is.na(prediction[1]))
  56                     {
  57                         nb_jours = nb_jours + 1
  58                         error = error +
  59                             mean((data$getSerie(cv_days[i]+1)[1:horizon] - prediction)^2)
  60                     }
  61                 }
  62                 return (error / nb_jours)
  63             }
  64
  65             if (simtype != "endo")
  66             {
  67                 h_best_exo = optimize(
  68                     errorOnLastNdays, c(0,10), kernel=kernel, simtype="exo")$minimum
  69             }
  70             if (simtype != "exo")
  71             {
  72                 h_best_endo = optimize(
  73                     errorOnLastNdays, c(0,10), kernel=kernel, simtype="endo")$minimum
  74             }
  75
  76             if (simtype == "endo")
  77             {
  78                 return (private$.predictShapeAux(data,
  79                     fdays, today, horizon, h_best_endo, kernel, "endo", TRUE))
  80             }
  81             if (simtype == "exo")
  82             {
  83                 return (private$.predictShapeAux(data,
  84                     fdays, today, horizon, h_best_exo, kernel, "exo", TRUE))
  85             }
  86             if (simtype == "mix")
  87             {
  88                 h_best_mix = c(h_best_endo,h_best_exo)
  89                 return(private$.predictShapeAux(data,
  90                     fdays, today, horizon, h_best_mix, kernel, "mix", TRUE))
  91             }
  92         }
  93     ),
  94     private = list(
  95         # Precondition: "today" is full (no NAs)
  96         .predictShapeAux = function(data, fdays, today, horizon, h, kernel, simtype, final_call)
  97         {
  98             fdays = fdays[ fdays < today ]
  99             # TODO: 3 = magic number
 100             if (length(fdays) < 3)
 101                 return (NA)
 102
 103             # Neighbors: days in "same season"
 104             sdays = getSimilarDaysIndices(today, data, limit=45, same_season=TRUE)
 105             indices = intersect(fdays,sdays)
 106             if (length(indices) <= 1)
 107                 return (NA)
 108             levelToday = data$getLevel(today)
 109             distances = sapply(indices, function(i) abs(data$getLevel(i)-levelToday))
 110             # 2 and 5 below == magic numbers (determined by Bruno & Michel)
 111             same_pollution = (distances <= 2)
 112             if (sum(same_pollution) == 0)
 113             {
 114                 same_pollution = (distances <= 5)
 115                 if (sum(same_pollution) == 0)
 116                     return (NA)
 117             }
 118             indices = indices[same_pollution]
 119             if (length(indices) == 1)
 120             {
 121                 if (final_call)
 122                 {
 123                     private$.params$weights <- 1
 124                     private$.params$indices <- indices
 125                     private$.params$window <- 1
 126                 }
 127                 return ( data$getSerie(indices[1])[1:horizon] ) #what else?!
 128             }
 129
 130             if (simtype != "exo")
 131             {
 132                 h_endo = ifelse(simtype=="mix", h[1], h)
 133
 134                 # Distances from last observed day to days in the past
 135                 serieToday = data$getSerie(today)
 136                 distances2 = sapply(indices, function(i) {
 137                     delta = serieToday - data$getSerie(i)
 138                     mean(delta^2)
 139                 })
 140
 141                 sd_dist = sd(distances2)
 142                 if (sd_dist < .Machine$double.eps)
 143                 {
 144 #                   warning("All computed distances are very close: stdev too small")
 145                     sd_dist = 1 #mostly for tests... FIXME:
 146                 }
 147                 simils_endo =
 148                     if (kernel=="Gauss")
 149                         exp(-distances2/(sd_dist*h_endo^2))
 150                     else
 151                     {
 152                         # Epanechnikov
 153                         u = 1 - distances2/(sd_dist*h_endo^2)
 154                         u[abs(u)>1] = 0.
 155                         u
 156                     }
 157             }
 158
 159             if (simtype != "endo")
 160             {
 161                 h_exo = ifelse(simtype=="mix", h[2], h)
 162
 163                 M = matrix( nrow=1+length(indices), ncol=1+length(data$getExo(today)) )
 164                 M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
 165                 for (i in seq_along(indices))
 166                     M[i+1,] = c( data$getLevel(indices[i]), as.double(data$getExo(indices[i])) )
 167
 168                 sigma = cov(M) #NOTE: robust covariance is way too slow
 169                 # TODO: 10 == magic number; more robust way == det, or always ginv()
 170                 sigma_inv =
 171                     if (length(indices) > 10)
 172                         solve(sigma)
 173                     else
 174                         MASS::ginv(sigma)
 175
 176                 # Distances from last observed day to days in the past
 177                 distances2 = sapply(seq_along(indices), function(i) {
 178                     delta = M[1,] - M[i+1,]
 179                     delta %*% sigma_inv %*% delta
 180                 })
 181
 182                 sd_dist = sd(distances2)
 183                 if (sd_dist < .25 * sqrt(.Machine$double.eps))
 184                 {
 185 #                   warning("All computed distances are very close: stdev too small")
 186                     sd_dist = 1 #mostly for tests... FIXME:
 187                 }
 188                 simils_exo =
 189                     if (kernel=="Gauss")
 190                         exp(-distances2/(sd_dist*h_exo^2))
 191                     else
 192                     {
 193                         # Epanechnikov
 194                         u = 1 - distances2/(sd_dist*h_exo^2)
 195                         u[abs(u)>1] = 0.
 196                         u
 197                     }
 198             }
 199
 200             similarities =
 201                 if (simtype == "exo")
 202                     simils_exo
 203                 else if (simtype == "endo")
 204                     simils_endo
 205                 else #mix
 206                     simils_endo * simils_exo
 207
 208             prediction = rep(0, horizon)
 209             for (i in seq_along(indices))
 210                 prediction = prediction + similarities[i] * data$getSerie(indices[i]+1)[1:horizon]
 211             prediction = prediction / sum(similarities, na.rm=TRUE)
 212
 213             if (final_call)
 214             {
 215                 private$.params$weights <- similarities
 216                 private$.params$indices <- indices
 217                 private$.params$window <-
 218                     if (simtype=="endo")
 219                         h_endo
 220                     else if (simtype=="exo")
 221                         h_exo
 222                     else #mix
 223                         c(h_endo,h_exo)
 224             }
 225
 226             return (prediction)
 227         }
 228     )
 229 )