Commit | Line | Data |
---|---|---|
5c49f6ce BA |
1 | #' @include Forecaster.R |
2 | #' | |
3 | #' Neighbors2 Forecaster | |
4 | #' | |
5 | #' Predict tomorrow as a weighted combination of "futures of the past" days. | |
6 | #' Inherits \code{\link{Forecaster}} | |
7 | #' | |
8 | Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", | |
9 | inherit = Forecaster, | |
10 | ||
11 | public = list( | |
12 | predictShape = function(data, today, memory, horizon, ...) | |
13 | { | |
14 | # (re)initialize computed parameters | |
15 | private$.params <- list("weights"=NA, "indices"=NA, "window"=NA) | |
16 | ||
17 | # Do not forecast on days with NAs (TODO: softer condition...) | |
18 | if (any(is.na(data$getCenteredSerie(today)))) | |
19 | return (NA) | |
20 | ||
21 | # Determine indices of no-NAs days followed by no-NAs tomorrows | |
9db234c5 | 22 | fdays = getNoNA2(data, max(today-memory,1), today-1) |
5c49f6ce BA |
23 | |
24 | # Get optional args | |
25 | kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan" | |
26 | if (hasArg(h_window)) | |
27 | { | |
28 | return ( private$.predictShapeAux(data, | |
29 | fdays, today, horizon, list(...)$h_window, kernel, TRUE) ) | |
30 | } | |
31 | ||
9db234c5 BA |
32 | # Indices of similar days for cross-validation; TODO: 45 = magic number |
33 | sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE) | |
34 | ||
5c49f6ce BA |
35 | # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days |
36 | errorOnLastNdays = function(h, kernel) | |
37 | { | |
38 | error = 0 | |
39 | nb_jours = 0 | |
9db234c5 | 40 | for (day in intersect(fdays,sdays)) |
5c49f6ce BA |
41 | { |
42 | # mix_strategy is never used here (simtype != "mix"), therefore left blank | |
2ae38266 | 43 | prediction = private$.predictShapeAux(data,fdays,day,horizon,h,kernel,FALSE) |
5c49f6ce BA |
44 | if (!is.na(prediction[1])) |
45 | { | |
46 | nb_jours = nb_jours + 1 | |
47 | error = error + | |
48 | mean((data$getSerie(i+1)[1:horizon] - prediction)^2) | |
49 | } | |
50 | } | |
51 | return (error / nb_jours) | |
52 | } | |
53 | ||
54 | # h :: only for endo in this variation | |
9db234c5 | 55 | h_best = optimize(errorOnLastNdays, c(0,7), kernel=kernel)$minimum |
2ae38266 | 56 | return (private$.predictShapeAux(data,fdays,today,horizon,h_best,kernel,TRUE)) |
5c49f6ce BA |
57 | } |
58 | ), | |
59 | private = list( | |
60 | # Precondition: "today" is full (no NAs) | |
61 | .predictShapeAux = function(data, fdays, today, horizon, h, kernel, final_call) | |
62 | { | |
63 | fdays = fdays[ fdays < today ] | |
64 | # TODO: 3 = magic number | |
65 | if (length(fdays) < 3) | |
66 | return (NA) | |
67 | ||
9db234c5 BA |
68 | # Neighbors: days in "same season" |
69 | sdays = getSimilarDaysIndices(today, limit=45, same_season=TRUE, data) | |
70 | indices = intersect(fdays,sdays) | |
71 | levelToday = data$getLevel(today) | |
72 | distances = sapply(seq_along(indices), function(i) abs(data$getLevel(i)-levelToday)) | |
73 | same_pollution = (distances <= 2) | |
74 | if (sum(same_pollution) < 3) #TODO: 3 == magic number | |
5c49f6ce | 75 | { |
9db234c5 BA |
76 | same_pollution = (distances <= 5) |
77 | if (sum(same_pollution) < 3) | |
78 | return (NA) | |
5c49f6ce | 79 | } |
9db234c5 BA |
80 | indices = indices[same_pollution] |
81 | ||
82 | # Now OK: indices same season, same pollution level | |
83 | # ........... | |
84 | ||
85 | ||
86 | # ENDO:: Distances from last observed day to days in the past | |
87 | serieToday = data$getSerie(today) | |
88 | distances2 = sapply(indices, function(i) { | |
89 | delta = serieToday - data$getSerie(i) | |
90 | distances2[i] = mean(delta^2) | |
91 | }) | |
5c49f6ce BA |
92 | |
93 | sd_dist = sd(distances2) | |
94 | if (sd_dist < .Machine$double.eps) | |
95 | { | |
96 | # warning("All computed distances are very close: stdev too small") | |
97 | sd_dist = 1 #mostly for tests... FIXME: | |
98 | } | |
99 | simils_endo = | |
100 | if (kernel=="Gauss") | |
101 | exp(-distances2/(sd_dist*h_endo^2)) | |
102 | else | |
103 | { | |
104 | # Epanechnikov | |
105 | u = 1 - distances2/(sd_dist*h_endo^2) | |
106 | u[abs(u)>1] = 0. | |
107 | u | |
108 | } | |
109 | ||
9db234c5 BA |
110 | # # EXOGENS: distances computations are enough |
111 | # # TODO: search among similar concentrations....... at this stage ?! | |
112 | # M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) ) | |
113 | # M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) ) | |
114 | # for (i in seq_along(fdays)) | |
115 | # M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) ) | |
116 | # | |
117 | # sigma = cov(M) #NOTE: robust covariance is way too slow | |
118 | # sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed? | |
119 | # | |
120 | # # Distances from last observed day to days in the past | |
121 | # distances2 = rep(NA, nrow(M)-1) | |
122 | # for (i in 2:nrow(M)) | |
123 | # { | |
124 | # delta = M[1,] - M[i,] | |
125 | # distances2[i-1] = delta %*% sigma_inv %*% delta | |
126 | # } | |
127 | ||
128 | similarities = simils_endo | |
5c49f6ce BA |
129 | |
130 | prediction = rep(0, horizon) | |
9db234c5 BA |
131 | for (i in seq_along(indices)) |
132 | prediction = prediction + similarities[i] * data$getSerie(indices[i]+1)[1:horizon] | |
5c49f6ce BA |
133 | prediction = prediction / sum(similarities, na.rm=TRUE) |
134 | ||
135 | if (final_call) | |
136 | { | |
137 | private$.params$weights <- similarities | |
9db234c5 | 138 | private$.params$indices <- indices |
2ae38266 | 139 | private$.params$window <- h |
5c49f6ce BA |
140 | } |
141 | ||
142 | return (prediction) | |
143 | } | |
144 | ) | |
145 | ) |