Commit | Line | Data |
---|---|---|
5c49f6ce BA |
1 | #' @include Forecaster.R |
2 | #' | |
3 | #' Neighbors2 Forecaster | |
4 | #' | |
5 | #' Predict tomorrow as a weighted combination of "futures of the past" days. | |
6 | #' Inherits \code{\link{Forecaster}} | |
7 | #' | |
8 | Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", | |
9 | inherit = Forecaster, | |
10 | ||
11 | public = list( | |
12 | predictShape = function(data, today, memory, horizon, ...) | |
13 | { | |
14 | # (re)initialize computed parameters | |
15 | private$.params <- list("weights"=NA, "indices"=NA, "window"=NA) | |
16 | ||
17 | # Do not forecast on days with NAs (TODO: softer condition...) | |
18 | if (any(is.na(data$getCenteredSerie(today)))) | |
19 | return (NA) | |
20 | ||
21 | # Determine indices of no-NAs days followed by no-NAs tomorrows | |
22 | fdays = getNoNA2(data, max(today-memory,1), today-1) | |
23 | ||
24 | # Get optional args | |
25 | kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan" | |
26 | if (hasArg(h_window)) | |
27 | { | |
28 | return ( private$.predictShapeAux(data, | |
29 | fdays, today, horizon, list(...)$h_window, kernel, TRUE) ) | |
30 | } | |
31 | ||
32 | ||
33 | # Indices of similar days for cross-validation; TODO: 45 = magic number | |
34 | # TODO: ici faut une sorte de "same_season==TRUE" --> mois similaires epandage | |
35 | sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE) | |
36 | ||
37 | ||
38 | # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days | |
39 | errorOnLastNdays = function(h, kernel) | |
40 | { | |
41 | error = 0 | |
42 | nb_jours = 0 | |
43 | for (i in intersect(fdays,sdays)) | |
44 | { | |
45 | # mix_strategy is never used here (simtype != "mix"), therefore left blank | |
46 | prediction = private$.predictShapeAux(data, fdays, i, horizon, h, kernel, FALSE) | |
47 | if (!is.na(prediction[1])) | |
48 | { | |
49 | nb_jours = nb_jours + 1 | |
50 | error = error + | |
51 | mean((data$getSerie(i+1)[1:horizon] - prediction)^2) | |
52 | } | |
53 | } | |
54 | return (error / nb_jours) | |
55 | } | |
56 | ||
57 | # h :: only for endo in this variation | |
58 | h_best_endo = optimize(errorOnLastNdays, c(0,10), kernel=kernel)$minimum | |
59 | ||
60 | return (private$.predictShapeAux(data, fdays, today, horizon, h_best, kernel, TRUE)) | |
61 | } | |
62 | ), | |
63 | private = list( | |
64 | # Precondition: "today" is full (no NAs) | |
65 | .predictShapeAux = function(data, fdays, today, horizon, h, kernel, final_call) | |
66 | { | |
67 | fdays = fdays[ fdays < today ] | |
68 | # TODO: 3 = magic number | |
69 | if (length(fdays) < 3) | |
70 | return (NA) | |
71 | ||
72 | # ENDO:: Distances from last observed day to days in the past | |
73 | distances2 = rep(NA, length(fdays)) | |
74 | for (i in seq_along(fdays)) | |
75 | { | |
76 | delta = data$getSerie(today) - data$getSerie(fdays[i]) | |
77 | # Require at least half of non-NA common values to compute the distance | |
78 | if ( !any( is.na(delta) ) ) | |
79 | distances2[i] = mean(delta^2) | |
80 | } | |
81 | ||
82 | sd_dist = sd(distances2) | |
83 | if (sd_dist < .Machine$double.eps) | |
84 | { | |
85 | # warning("All computed distances are very close: stdev too small") | |
86 | sd_dist = 1 #mostly for tests... FIXME: | |
87 | } | |
88 | simils_endo = | |
89 | if (kernel=="Gauss") | |
90 | exp(-distances2/(sd_dist*h_endo^2)) | |
91 | else | |
92 | { | |
93 | # Epanechnikov | |
94 | u = 1 - distances2/(sd_dist*h_endo^2) | |
95 | u[abs(u)>1] = 0. | |
96 | u | |
97 | } | |
98 | ||
99 | # EXOGENS: distances computations are enough | |
100 | # TODO: search among similar concentrations....... at this stage ?! | |
101 | M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) ) | |
102 | M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) ) | |
103 | for (i in seq_along(fdays)) | |
104 | M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) ) | |
105 | ||
106 | sigma = cov(M) #NOTE: robust covariance is way too slow | |
107 | sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed? | |
108 | ||
109 | # Distances from last observed day to days in the past | |
110 | distances2 = rep(NA, nrow(M)-1) | |
111 | for (i in 2:nrow(M)) | |
112 | { | |
113 | delta = M[1,] - M[i,] | |
114 | distances2[i-1] = delta %*% sigma_inv %*% delta | |
115 | } | |
116 | ||
117 | ppv <- sort(distances2, index.return=TRUE)$ix[1:10] #.............. | |
118 | #PPV pour endo ? | |
119 | ||
120 | similarities = | |
121 | if (simtype == "exo") | |
122 | simils_exo | |
123 | else if (simtype == "endo") | |
124 | simils_endo | |
125 | else #mix | |
126 | simils_endo * simils_exo | |
127 | ||
128 | prediction = rep(0, horizon) | |
129 | for (i in seq_along(fdays)) | |
130 | prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon] | |
131 | prediction = prediction / sum(similarities, na.rm=TRUE) | |
132 | ||
133 | if (final_call) | |
134 | { | |
135 | private$.params$weights <- similarities | |
136 | private$.params$indices <- fdays | |
137 | private$.params$window <- | |
138 | if (simtype=="endo") | |
139 | h_endo | |
140 | else if (simtype=="exo") | |
141 | h_exo | |
142 | else #mix | |
143 | c(h_endo,h_exo) | |
144 | } | |
145 | ||
146 | return (prediction) | |
147 | } | |
148 | ) | |
149 | ) |