return (NA)
}
- # Determine indices of no-NAs days preceded by no-NAs yerstedays
- tdays = .getNoNA2(data, max(today-memory,2), today-1)
-
# Get optional args
local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season?
simtype = ifelse(hasArg("simtype"), list(...)$simtype, "none") #or "endo", or "exo"
+ opera = ifelse(hasArg("opera"), list(...)$opera, FALSE) #operational mode?
+
+ # Determine indices of no-NAs days preceded by no-NAs yerstedays
+ tdays = .getNoNA2(data, max(today-memory,2), ifelse(opera,today-1,data$getSize()))
+ if (!opera)
+ tdays = setdiff(tdays, today) #always exclude current day
+
+ # Shortcut if window is known
if (hasArg("window"))
{
- return ( private$.predictShapeAux(data,
- tdays, today, predict_from, horizon, local, list(...)$window, simtype, TRUE) )
+ return ( private$.predictShapeAux(data, tdays, today, predict_from, horizon,
+ local, list(...)$window, simtype, opera, TRUE) )
}
# Indices of similar days for cross-validation; TODO: 20 = magic number
cv_days = getSimilarDaysIndices(today, data, limit=20, same_season=FALSE,
- days_in=tdays)
+ days_in=tdays, operational=opera)
# Optimize h : h |--> sum of prediction errors on last N "similar" days
errorOnLastNdays = function(window, simtype)
{
# mix_strategy is never used here (simtype != "mix"), therefore left blank
prediction = private$.predictShapeAux(data, tdays, cv_days[i], predict_from,
- horizon, local, window, simtype, FALSE)
+ horizon, local, window, simtype, opera, FALSE)
if (!is.na(prediction[1]))
{
nb_jours = nb_jours + 1
1
return( private$.predictShapeAux(data, tdays, today, predict_from, horizon, local,
- best_window, simtype, TRUE) )
+ best_window, simtype, opera, TRUE) )
}
),
private = list(
- # Precondition: "today" is full (no NAs)
+ # Precondition: "yersteday until predict_from-1" is full (no NAs)
.predictShapeAux = function(data, tdays, today, predict_from, horizon, local, window,
- simtype, final_call)
+ simtype, opera, final_call)
{
- tdays_cut = tdays[ tdays <= today-1 ]
- if (length(tdays_cut) <= 1)
+ tdays_cut = tdays[ tdays != today ]
+ if (length(tdays_cut) == 0)
return (NA)
if (local)
{
# TODO: 60 == magic number
tdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE,
- days_in=tdays_cut)
- if (length(tdays) <= 1)
- return (NA)
- # TODO: 10, 12 == magic numbers
- tdays = .getConstrainedNeighbs(today,data,tdays,min_neighbs=10,max_neighbs=12)
+ days_in=tdays_cut, operational=opera)
+# if (length(tdays) <= 1)
+# return (NA)
+ # TODO: 10 == magic number
+ tdays = .getConstrainedNeighbs(today, data, tdays, min_neighbs=10)
if (length(tdays) == 1)
{
if (final_call)
# Compute endogen similarities using given window
window_endo = ifelse(simtype=="mix", window[1], window)
- # Distances from last observed day to days in the past
- lastSerie = c( data$getSerie(today-1),
- data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()] )
- distances2 = sapply(tdays, function(i) {
- delta = lastSerie - c(data$getSerie(i-1),
- data$getSerie(i)[if (predict_from>=2) 1:(predict_from-1) else c()])
- sqrt(mean(delta^2))
- })
+ # Distances from last observed day to selected days in the past
+ distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
+
+ if (local)
+ {
+ max_neighbs = 12 #TODO: 12 = arbitrary number
+ if (length(distances2) > max_neighbs)
+ {
+ ordering <- order(distances2)
+ tdays <- tdays[ ordering[1:max_neighbs] ]
+ distances2 <- distances2[ ordering[1:max_neighbs] ]
+ }
+ }
simils_endo <- .computeSimils(distances2, window_endo)
}
# Compute exogen similarities using given window
window_exo = ifelse(simtype=="mix", window[2], window)
- M = matrix( ncol=1+length(tdays), nrow=1+length(data$getExo(1)) )
- M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) )
- for (i in seq_along(tdays))
- M[,i+1] = c( data$getLevel(tdays[i]), as.double(data$getExo(tdays[i])) )
-
- sigma = cov(t(M)) #NOTE: robust covariance is way too slow
- # TODO: 10 == magic number; more robust way == det, or always ginv()
- sigma_inv =
- if (length(tdays) > 10)
- solve(sigma)
- else
- MASS::ginv(sigma)
-
- # Distances from last observed day to days in the past
- distances2 = sapply(seq_along(tdays), function(i) {
- delta = M[,1] - M[,i+1]
- delta %*% sigma_inv %*% delta
- })
+ distances2 <- .computeDistsExo(data, today, tdays)
simils_exo <- .computeSimils(distances2, window_exo)
}
# @param min_neighbs Minimum number of points in a neighborhood
# @param max_neighbs Maximum number of points in a neighborhood
#
-.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10, max_neighbs=12)
+.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10)
{
levelToday = data$getLevelHat(today)
- levelYersteday = data$getLevel(today-1)
+# levelYersteday = data$getLevel(today-1)
distances = sapply(tdays, function(i) {
- sqrt((data$getLevel(i-1)-levelYersteday)^2 + (data$getLevel(i)-levelToday)^2)
+# sqrt((data$getLevel(i-1)-levelYersteday)^2 + (data$getLevel(i)-levelToday)^2)
+ abs(data$getLevel(i)-levelToday)
})
#TODO: 1, +1, +3 : magic numbers
dist_thresh = 1
dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
}
tdays = tdays[same_pollution]
- max_neighbs = 12
- if (nb_neighbs > max_neighbs)
- {
- # Keep only max_neighbs closest neighbors
- tdays = tdays[ order(distances[same_pollution])[1:max_neighbs] ]
- }
+# max_neighbs = 12
+# if (nb_neighbs > max_neighbs)
+# {
+# # Keep only max_neighbs closest neighbors
+# tdays = tdays[ order(distances[same_pollution])[1:max_neighbs] ]
+# }
tdays
}
}
exp(-distances2/(sd_dist*window^2))
}
+
+.computeDistsEndo <- function(data, today, tdays, predict_from)
+{
+ lastSerie = c( data$getSerie(today-1),
+ data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()] )
+ sapply(tdays, function(i) {
+ delta = lastSerie - c(data$getSerie(i-1),
+ data$getSerie(i)[if (predict_from>=2) 1:(predict_from-1) else c()])
+ sqrt(mean(delta^2))
+ })
+}
+
+.computeDistsExo <- function(data, today, tdays)
+{
+ M = matrix( ncol=1+length(tdays), nrow=1+length(data$getExo(1)) )
+ M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) )
+ for (i in seq_along(tdays))
+ M[,i+1] = c( data$getLevel(tdays[i]), as.double(data$getExo(tdays[i])) )
+
+ sigma = cov(t(M)) #NOTE: robust covariance is way too slow
+ # TODO: 10 == magic number; more robust way == det, or always ginv()
+ sigma_inv =
+ if (length(tdays) > 10)
+ solve(sigma)
+ else
+ MASS::ginv(sigma)
+
+ # Distances from last observed day to days in the past
+ sapply(seq_along(tdays), function(i) {
+ delta = M[,1] - M[,i+1]
+ delta %*% sigma_inv %*% delta
+ })
+}
{
data = getDataTest(150)
-
-
-
-#TODO: debug
-
-
-
- # index 144-1 == 143 : serie type 2
- pred = computeForecast(data, 143, "Neighbors", "Zero", predict_from=8,
- horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1)
+ # index 144 : serie type 3, yersteday type 2
+ pred = computeForecast(data, 144, "Neighbors", "Zero", predict_from=1,
+ horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, window=1, opera=TRUE)
f = computeFilaments(data, pred, 1, 8, limit=60, plot=FALSE)
- # Expected output: 50-3-10 series of type 2, then 23 series of type 3 (closest next)
+ # Expected output: 50-3-10 series of type 2+1 = 3,
+ # then 23 series of type 3+1 %% 3 = 1 (3 = closest next)
expect_identical(length(f$neighb_indices), as.integer(60))
expect_identical(length(f$colors), as.integer(60))
- expect_equal(f$index, 143)
- expect_true(all(I(f$neighb_indices) >= 2))
+ expect_equal(f$index, 144)
+ expect_true(all(I(f$neighb_indices) != 2))
for (i in 1:37)
{
- expect_equal(I(f$neighb_indices[i]), 2)
+ expect_equal(I(f$neighb_indices[i]), 3)
expect_match(f$colors[i], f$colors[1])
}
for (i in 38:60)
{
- expect_equal(I(f$neighb_indices[i]), 3)
+ expect_equal(I(f$neighb_indices[i]), 1)
expect_match(f$colors[i], f$colors[38])
}
expect_match(f$colors[1], "#1*")
expect_match(f$colors[38], "#E*")
- # index 143-1 == 142 : serie type 1
- pred = computeForecast(data, 143, "Neighbors", "Zero", predict_from=8,
- horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1)
+ # index 143 : serie type 2
+ pred = computeForecast(data, 143, "Neighbors", "Zero", predict_from=1,
+ horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, window=1, opera=TRUE)
f = computeFilaments(data, pred, 1, 8, limit=50, plot=FALSE)
- # Expected output: 50-10-3 series of type 1, then 13 series of type 3 (closest next)
- # NOTE: -10 because only past days with no-NAs tomorrow => exclude type 1 in [60,90[
+ # Expected output: 50-10-3 series of type 1+1=2,
+ # then 13 series of type 3+1 %% 3 = 1 (closest next)
+ # NOTE: -10 because only past tomorrows with no-NAs yerstedays
+ # => exclude type 2 in [60,90[
expect_identical(length(f$neighb_indices), as.integer(50))
expect_identical(length(f$colors), as.integer(50))
expect_equal(f$index, 143)
- expect_true(all(I(f$neighb_indices) != 2))
+ expect_true(all(I(f$neighb_indices) <= 2))
for (i in 1:37)
{
- expect_equal(I(f$neighb_indices[i]), 1)
+ expect_equal(I(f$neighb_indices[i]), 2)
expect_match(f$colors[i], f$colors[1])
}
for (i in 38:50)
{
- expect_equal(I(f$neighb_indices[i]), 3)
+ expect_equal(I(f$neighb_indices[i]), 1)
expect_match(f$colors[i], f$colors[38])
}
expect_match(f$colors[1], "#1*")