7a3fbe525352405837e55c1fe7b69350768257ed
[talweg.git] / pkg / R / F_Neighbors.R
1 #' @include Forecaster.R
2 #'
3 #' Neighbors Forecaster
4 #'
5 #' Predict tomorrow as a weighted combination of "futures of the past" days.
6 #' Inherits \code{\link{Forecaster}}
7 NeighborsForecaster = R6::R6Class("NeighborsForecaster",
8 inherit = Forecaster,
9
10 public = list(
11 predictShape = function(today, memory, horizon, ...)
12 {
13 # (re)initialize computed parameters
14 private$.params <- list("weights"=NA, "indices"=NA, "window"=NA)
15
16 # Determine indices of no-NAs days followed by no-NAs tomorrows
17 fdays = private$.data$getCoupleDays(max(today-memory,1), today-1)
18
19 # Get optional args
20 simtype = ifelse(hasArg("simtype"), list(...)$simtype, "mix") #or "endo", or "exo"
21 kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan"
22 if (hasArg(h_window))
23 {
24 return ( private$.predictShapeAux(
25 fdays, today, horizon, list(...)$h_window, kernel, simtype, TRUE) )
26 }
27
28 # Indices of similar days for cross-validation; TODO: 45 = magic number
29 sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
30
31 # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
32 errorOnLastNdays = function(h, kernel, simtype)
33 {
34 error = 0
35 nb_jours = 0
36 for (i in intersect(fdays,sdays))
37 {
38 # mix_strategy is never used here (simtype != "mix"), therefore left blank
39 prediction = private$.predictShapeAux(fdays, i, horizon, h, kernel, simtype, FALSE)
40 if (!is.na(prediction[1]))
41 {
42 nb_jours = nb_jours + 1
43 error = error +
44 mean((private$.data$getCenteredSerie(i+1)[1:horizon] - prediction)^2)
45 }
46 }
47 return (error / nb_jours)
48 }
49
50 if (simtype != "endo")
51 {
52 h_best_exo = optimize(
53 errorOnLastNdays, c(0,10), kernel=kernel, simtype="exo")$minimum
54 }
55 if (simtype != "exo")
56 {
57 h_best_endo = optimize(
58 errorOnLastNdays, c(0,10), kernel=kernel, simtype="endo")$minimum
59 }
60
61 if (simtype == "endo")
62 {
63 return (private$.predictShapeAux(
64 fdays, today, horizon, h_best_endo, kernel, "endo", TRUE))
65 }
66 if (simtype == "exo")
67 {
68 return (private$.predictShapeAux(
69 fdays, today, horizon, h_best_exo, kernel, "exo", TRUE))
70 }
71 if (simtype == "mix")
72 {
73 h_best_mix = c(h_best_endo,h_best_exo)
74 return(private$.predictShapeAux(
75 fdays, today, horizon, h_best_mix, kernel, "mix", TRUE))
76 }
77 }
78 ),
79 private = list(
80 # Precondition: "today" is full (no NAs)
81 .predictShapeAux = function(fdays, today, horizon, h, kernel, simtype, final_call)
82 {
83 fdays = fdays[ fdays < today ]
84 # TODO: 3 = magic number
85 if (length(fdays) < 3)
86 return (NA)
87
88 data = private$.data #shorthand
89
90 if (simtype != "exo")
91 {
92 h_endo = ifelse(simtype=="mix", h[1], h)
93
94 # Distances from last observed day to days in the past
95 distances2 = rep(NA, length(fdays))
96 for (i in seq_along(fdays))
97 {
98 delta = data$getCenteredSerie(today) - data$getCenteredSerie(fdays[i])
99 # Require at least half of non-NA common values to compute the distance
100 if (sum(is.na(delta)) <= 0) #length(delta)/2)
101 distances2[i] = mean(delta^2) #, na.rm=TRUE)
102 }
103
104 sd_dist = sd(distances2)
105 if (sd_dist < .Machine$double.eps)
106 sd_dist = 1 #mostly for tests... FIXME:
107 simils_endo =
108 if (kernel=="Gauss")
109 exp(-distances2/(sd_dist*h_endo^2))
110 else { #Epanechnikov
111 u = 1 - distances2/(sd_dist*h_endo^2)
112 u[abs(u)>1] = 0.
113 u
114 }
115 }
116
117 if (simtype != "endo")
118 {
119 h_exo = ifelse(simtype=="mix", h[2], h)
120
121 M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
122 M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
123 for (i in seq_along(fdays))
124 M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
125
126 sigma = cov(M) #NOTE: robust covariance is way too slow
127 sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
128
129 # Distances from last observed day to days in the past
130 distances2 = rep(NA, nrow(M)-1)
131 for (i in 2:nrow(M))
132 {
133 delta = M[1,] - M[i,]
134 distances2[i-1] = delta %*% sigma_inv %*% delta
135 }
136
137 sd_dist = sd(distances2)
138 simils_exo =
139 if (kernel=="Gauss")
140 exp(-distances2/(sd_dist*h_exo^2))
141 else { #Epanechnikov
142 u = 1 - distances2/(sd_dist*h_exo^2)
143 u[abs(u)>1] = 0.
144 u
145 }
146 }
147
148 similarities =
149 if (simtype == "exo")
150 simils_exo
151 else if (simtype == "endo")
152 simils_endo
153 else #mix
154 simils_endo * simils_exo
155
156 prediction = rep(0, horizon)
157 for (i in seq_along(fdays))
158 prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon]
159 prediction = prediction / sum(similarities, na.rm=TRUE)
160
161 if (final_call)
162 {
163 private$.params$weights <- similarities
164 private$.params$indices <- fdays
165 private$.params$window <-
166 if (simtype=="endo") {
167 h_endo
168 } else if (simtype=="exo") {
169 h_exo
170 } else { #mix
171 c(h_endo,h_exo)
172 }
173 }
174
175 return (prediction)
176 }
177 )
178 )