Commit | Line | Data |
---|---|---|
5c49f6ce BA |
1 | #' @include Forecaster.R |
2 | #' | |
3 | #' Neighbors2 Forecaster | |
4 | #' | |
5 | #' Predict tomorrow as a weighted combination of "futures of the past" days. | |
6 | #' Inherits \code{\link{Forecaster}} | |
7 | #' | |
8 | Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", | |
9 | inherit = Forecaster, | |
10 | ||
11 | public = list( | |
12 | predictShape = function(data, today, memory, horizon, ...) | |
13 | { | |
14 | # (re)initialize computed parameters | |
15 | private$.params <- list("weights"=NA, "indices"=NA, "window"=NA) | |
16 | ||
17 | # Do not forecast on days with NAs (TODO: softer condition...) | |
18 | if (any(is.na(data$getCenteredSerie(today)))) | |
19 | return (NA) | |
20 | ||
21 | # Determine indices of no-NAs days followed by no-NAs tomorrows | |
2ae38266 BA |
22 | # Indices of similar days for cross-validation; TODO: 45 = magic number |
23 | fdays = intersect( | |
24 | getNoNA2(data, max(today-memory,1), today-1) | |
25 | getSimilarDaysIndices(today, limit=45, same_season=TRUE) ) | |
5c49f6ce BA |
26 | |
27 | # Get optional args | |
28 | kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan" | |
29 | if (hasArg(h_window)) | |
30 | { | |
31 | return ( private$.predictShapeAux(data, | |
32 | fdays, today, horizon, list(...)$h_window, kernel, TRUE) ) | |
33 | } | |
34 | ||
5c49f6ce BA |
35 | # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days |
36 | errorOnLastNdays = function(h, kernel) | |
37 | { | |
38 | error = 0 | |
39 | nb_jours = 0 | |
2ae38266 | 40 | for (day in fdays) |
5c49f6ce BA |
41 | { |
42 | # mix_strategy is never used here (simtype != "mix"), therefore left blank | |
2ae38266 | 43 | prediction = private$.predictShapeAux(data,fdays,day,horizon,h,kernel,FALSE) |
5c49f6ce BA |
44 | if (!is.na(prediction[1])) |
45 | { | |
46 | nb_jours = nb_jours + 1 | |
47 | error = error + | |
48 | mean((data$getSerie(i+1)[1:horizon] - prediction)^2) | |
49 | } | |
50 | } | |
51 | return (error / nb_jours) | |
52 | } | |
53 | ||
54 | # h :: only for endo in this variation | |
2ae38266 BA |
55 | h_best = optimize(errorOnLastNdays, c(0,10), kernel=kernel)$minimum |
56 | return (private$.predictShapeAux(data,fdays,today,horizon,h_best,kernel,TRUE)) | |
5c49f6ce BA |
57 | } |
58 | ), | |
59 | private = list( | |
60 | # Precondition: "today" is full (no NAs) | |
61 | .predictShapeAux = function(data, fdays, today, horizon, h, kernel, final_call) | |
62 | { | |
63 | fdays = fdays[ fdays < today ] | |
64 | # TODO: 3 = magic number | |
65 | if (length(fdays) < 3) | |
66 | return (NA) | |
67 | ||
68 | # ENDO:: Distances from last observed day to days in the past | |
69 | distances2 = rep(NA, length(fdays)) | |
70 | for (i in seq_along(fdays)) | |
71 | { | |
72 | delta = data$getSerie(today) - data$getSerie(fdays[i]) | |
73 | # Require at least half of non-NA common values to compute the distance | |
74 | if ( !any( is.na(delta) ) ) | |
75 | distances2[i] = mean(delta^2) | |
76 | } | |
77 | ||
78 | sd_dist = sd(distances2) | |
79 | if (sd_dist < .Machine$double.eps) | |
80 | { | |
81 | # warning("All computed distances are very close: stdev too small") | |
82 | sd_dist = 1 #mostly for tests... FIXME: | |
83 | } | |
84 | simils_endo = | |
85 | if (kernel=="Gauss") | |
86 | exp(-distances2/(sd_dist*h_endo^2)) | |
87 | else | |
88 | { | |
89 | # Epanechnikov | |
90 | u = 1 - distances2/(sd_dist*h_endo^2) | |
91 | u[abs(u)>1] = 0. | |
92 | u | |
93 | } | |
94 | ||
2ae38266 | 95 | |
5c49f6ce BA |
96 | # EXOGENS: distances computations are enough |
97 | # TODO: search among similar concentrations....... at this stage ?! | |
98 | M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) ) | |
99 | M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) ) | |
100 | for (i in seq_along(fdays)) | |
101 | M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) ) | |
102 | ||
103 | sigma = cov(M) #NOTE: robust covariance is way too slow | |
104 | sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed? | |
105 | ||
106 | # Distances from last observed day to days in the past | |
107 | distances2 = rep(NA, nrow(M)-1) | |
108 | for (i in 2:nrow(M)) | |
109 | { | |
110 | delta = M[1,] - M[i,] | |
111 | distances2[i-1] = delta %*% sigma_inv %*% delta | |
112 | } | |
113 | ||
114 | ppv <- sort(distances2, index.return=TRUE)$ix[1:10] #.............. | |
115 | #PPV pour endo ? | |
116 | ||
2ae38266 | 117 | |
5c49f6ce BA |
118 | similarities = |
119 | if (simtype == "exo") | |
120 | simils_exo | |
121 | else if (simtype == "endo") | |
122 | simils_endo | |
123 | else #mix | |
124 | simils_endo * simils_exo | |
125 | ||
126 | prediction = rep(0, horizon) | |
127 | for (i in seq_along(fdays)) | |
128 | prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon] | |
129 | prediction = prediction / sum(similarities, na.rm=TRUE) | |
130 | ||
131 | if (final_call) | |
132 | { | |
133 | private$.params$weights <- similarities | |
134 | private$.params$indices <- fdays | |
2ae38266 | 135 | private$.params$window <- h |
5c49f6ce BA |
136 | } |
137 | ||
138 | return (prediction) | |
139 | } | |
140 | ) | |
141 | ) |