Commit | Line | Data |
---|---|---|
3d69ff21 BA |
1 | #' @include ShapeForecaster.R |
2 | #' | |
3 | #' @title Neighbors Shape Forecaster | |
4 | #' | |
5 | #' @description Predict tomorrow as a weighted combination of "futures of the past" days. | |
6 | #' Inherits \code{\link{ShapeForecaster}} | |
7 | NeighborsShapeForecaster = setRefClass( | |
8 | Class = "NeighborsShapeForecaster", | |
9 | contains = "ShapeForecaster", | |
10 | ||
11 | methods = list( | |
12 | initialize = function(...) | |
13 | { | |
14 | callSuper(...) | |
15 | }, | |
16 | predict = function(today, memory, horizon, ...) | |
17 | { | |
18 | # (re)initialize computed parameters | |
19 | params <<- list("weights"=NA, "indices"=NA, "window"=NA) | |
20 | ||
21 | first_day = max(today - memory, 1) | |
22 | # The first day is generally not complete: | |
23 | if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(2))) | |
24 | first_day = 2 | |
25 | ||
26 | # Predict only on non-NAs days (TODO:) | |
27 | if (any(is.na(data$getSerie(today)))) | |
28 | return (NA) | |
29 | ||
30 | # Determine indices of no-NAs days followed by no-NAs tomorrows | |
31 | fdays_indices = c() | |
32 | for (i in first_day:(today-1)) | |
33 | { | |
34 | if ( !any(is.na(data$getSerie(i)) | is.na(data$getSerie(i+1))) ) | |
35 | fdays_indices = c(fdays_indices, i) | |
36 | } | |
37 | ||
38 | #GET OPTIONAL PARAMS | |
39 | # Similarity computed with exogenous variables ? endogenous ? both ? ("exo","endo","mix") | |
40 | simtype = ifelse(hasArg("simtype"), list(...)$simtype, "exo") | |
41 | simthresh = ifelse(hasArg("simthresh"), list(...)$simthresh, 0.) | |
42 | kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") | |
43 | mix_strategy = ifelse(hasArg("mix_strategy"), list(...)$mix_strategy, "neighb") #or "mult" | |
44 | same_season = ifelse(hasArg("same_season"), list(...)$same_season, TRUE) | |
45 | if (hasArg(h_window)) | |
46 | return (.predictAux(fdays_indices, today, horizon, list(...)$h_window, kernel, simtype, | |
47 | simthresh, mix_strategy, FALSE)) | |
48 | #END GET | |
49 | ||
50 | # Indices for cross-validation; TODO: 45 = magic number | |
51 | indices = getSimilarDaysIndices(today, limit=45, same_season=same_season) | |
52 | #indices = (end_index-45):(end_index-1) | |
53 | ||
54 | # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days | |
55 | errorOnLastNdays = function(h, kernel, simtype) | |
56 | { | |
57 | error = 0 | |
58 | nb_jours = 0 | |
59 | for (i in indices) | |
60 | { | |
61 | # NOTE: predict only on non-NAs days followed by non-NAs (TODO:) | |
62 | if (!any(is.na(data$getSerie(i)) | is.na(data$getSerie(i+1)))) | |
63 | { | |
64 | nb_jours = nb_jours + 1 | |
65 | # mix_strategy is never used here (simtype != "mix"), therefore left blank | |
66 | prediction = .predictAux(fdays_indices, i, horizon, h, kernel, simtype, simthresh, | |
67 | "", FALSE) | |
68 | if (!is.na(prediction[1])) | |
69 | error = error + mean((data$getCenteredSerie(i+1)[1:horizon] - prediction)^2) | |
70 | } | |
71 | } | |
72 | return (error / nb_jours) | |
73 | } | |
74 | ||
75 | h_best_exo = 1. | |
76 | if (simtype != "endo" && !(simtype=="mix" && mix_strategy=="neighb")) | |
77 | { | |
78 | h_best_exo = optimize(errorOnLastNdays, interval=c(0,10), kernel=kernel, | |
79 | simtype="exo")$minimum | |
80 | } | |
81 | if (simtype != "exo") | |
82 | { | |
83 | h_best_endo = optimize(errorOnLastNdays, interval=c(0,10), kernel=kernel, | |
84 | simtype="endo")$minimum | |
85 | } | |
86 | ||
87 | if (simtype == "endo") | |
88 | { | |
89 | return (.predictAux(fdays_indices, today, horizon, h_best_endo, kernel, "endo", | |
90 | simthresh, "", TRUE)) | |
91 | } | |
92 | if (simtype == "exo") | |
93 | { | |
94 | return (.predictAux(fdays_indices, today, horizon, h_best_exo, kernel, "exo", | |
95 | simthresh, "", TRUE)) | |
96 | } | |
97 | if (simtype == "mix") | |
98 | { | |
99 | return (.predictAux(fdays_indices, today, horizon, c(h_best_endo,h_best_exo), kernel, | |
100 | "mix", simthresh, mix_strategy, TRUE)) | |
101 | } | |
102 | }, | |
103 | # Precondition: "today" is full (no NAs) | |
104 | .predictAux = function(fdays_indices, today, horizon, h, kernel, simtype, simthresh, | |
105 | mix_strategy, final_call) | |
106 | { | |
107 | dat = data$data #HACK: faster this way... | |
108 | ||
109 | fdays_indices = fdays_indices[fdays_indices < today] | |
110 | # TODO: 3 = magic number | |
111 | if (length(fdays_indices) < 3) | |
112 | return (NA) | |
113 | ||
114 | if (simtype != "exo") | |
115 | { | |
116 | h_endo = ifelse(simtype=="mix", h[1], h) | |
117 | ||
118 | # Distances from last observed day to days in the past | |
119 | distances2 = rep(NA, length(fdays_indices)) | |
120 | for (i in seq_along(fdays_indices)) | |
121 | { | |
122 | delta = dat[[today]]$serie - dat[[ fdays_indices[i] ]]$serie | |
123 | # Require at least half of non-NA common values to compute the distance | |
124 | if (sum(is.na(delta)) <= 0) #length(delta)/2) | |
125 | distances2[i] = mean(delta^2) #, na.rm=TRUE) | |
126 | } | |
127 | ||
128 | sd_dist = sd(distances2) | |
129 | simils_endo = | |
130 | if (kernel=="Gauss") { | |
131 | exp(-distances2/(sd_dist*h_endo^2)) | |
132 | } else { #Epanechnikov | |
133 | u = 1 - distances2/(sd_dist*h_endo^2) | |
134 | u[abs(u)>1] = 0. | |
135 | u | |
136 | } | |
137 | } | |
138 | ||
139 | if (simtype != "endo") | |
140 | { | |
141 | h_exo = ifelse(simtype=="mix", h[2], h) | |
142 | ||
143 | # TODO: [rnormand] if predict_at == 0h then we should use measures from day minus 1 | |
144 | M = matrix( nrow=1+length(fdays_indices), ncol=1+length(dat[[today]]$exo_hat) ) | |
145 | M[1,] = c( dat[[today]]$level, as.double(dat[[today]]$exo_hat) ) | |
146 | for (i in seq_along(fdays_indices)) | |
147 | { | |
148 | M[i+1,] = c( dat[[ fdays_indices[i] ]]$level, | |
149 | as.double(dat[[ fdays_indices[i] ]]$exo_hat) ) | |
150 | } | |
151 | ||
152 | sigma = cov(M) #NOTE: robust covariance is way too slow | |
153 | sigma_inv = qr.solve(sigma) | |
154 | ||
155 | # Distances from last observed day to days in the past | |
156 | distances2 = rep(NA, nrow(M)-1) | |
157 | for (i in 2:nrow(M)) | |
158 | { | |
159 | delta = M[1,] - M[i,] | |
160 | distances2[i-1] = delta %*% sigma_inv %*% delta | |
161 | } | |
162 | ||
163 | sd_dist = sd(distances2) | |
164 | simils_exo = | |
165 | if (kernel=="Gauss") { | |
166 | exp(-distances2/(sd_dist*h_exo^2)) | |
167 | } else { #Epanechnikov | |
168 | u = 1 - distances2/(sd_dist*h_exo^2) | |
169 | u[abs(u)>1] = 0. | |
170 | u | |
171 | } | |
172 | } | |
173 | ||
174 | if (simtype=="mix") | |
175 | { | |
176 | if (mix_strategy == "neighb") | |
177 | { | |
178 | #Only (60) most similar days according to exogen variables are kept into consideration | |
179 | #TODO: 60 = magic number | |
180 | keep_indices = sort(simils_exo, index.return=TRUE)$ix[1:(min(60,length(simils_exo)))] | |
181 | simils_endo[-keep_indices] = 0. | |
182 | } else #mix_strategy == "mult" | |
183 | { | |
184 | simils_endo = simils_endo * simils_exo | |
185 | } | |
186 | } | |
187 | ||
188 | similarities = | |
189 | if (simtype != "exo") { | |
190 | simils_endo | |
191 | } else { | |
192 | simils_exo | |
193 | } | |
194 | ||
195 | if (simthresh > 0.) | |
196 | { | |
197 | max_sim = max(similarities) | |
198 | # Set to 0 all similarities s where s / max_sim < simthresh, but keep at least 60 | |
199 | ordering = sort(similarities / max_sim, index.return=TRUE) | |
200 | if (ordering[60] < simthresh) | |
201 | { | |
202 | similarities[ ordering$ix[ - (1:60) ] ] = 0. | |
203 | } else | |
204 | { | |
205 | limit = 61 | |
206 | while (limit < length(similarities) && ordering[limit] >= simthresh) | |
207 | limit = limit + 1 | |
208 | similarities[ ordering$ix[ - 1:limit] ] = 0. | |
209 | } | |
210 | } | |
211 | ||
212 | prediction = rep(0, horizon) | |
213 | for (i in seq_along(fdays_indices)) | |
214 | prediction = prediction + similarities[i] * dat[[ fdays_indices[i]+1 ]]$serie[1:horizon] | |
215 | ||
216 | prediction = prediction / sum(similarities, na.rm=TRUE) | |
217 | if (final_call) | |
218 | { | |
219 | params$weights <<- similarities | |
220 | params$indices <<- fdays_indices | |
221 | params$window <<- | |
222 | if (simtype=="endo") { | |
223 | h_endo | |
224 | } else if (simtype=="exo") { | |
225 | h_exo | |
226 | } else { | |
227 | c(h_endo,h_exo) | |
228 | } | |
229 | } | |
230 | return (prediction) | |
231 | } | |
232 | ) | |
233 | ) |