return (NA)
# Determine indices of no-NAs days followed by no-NAs tomorrows
- # Indices of similar days for cross-validation; TODO: 45 = magic number
- fdays = intersect(
- getNoNA2(data, max(today-memory,1), today-1)
- getSimilarDaysIndices(today, limit=45, same_season=TRUE) )
+ fdays = getNoNA2(data, max(today-memory,1), today-1)
# Get optional args
kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan"
fdays, today, horizon, list(...)$h_window, kernel, TRUE) )
}
+ # Indices of similar days for cross-validation; TODO: 45 = magic number
+ sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
+
# Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
errorOnLastNdays = function(h, kernel)
{
error = 0
nb_jours = 0
- for (day in fdays)
+ for (day in intersect(fdays,sdays))
{
# mix_strategy is never used here (simtype != "mix"), therefore left blank
prediction = private$.predictShapeAux(data,fdays,day,horizon,h,kernel,FALSE)
}
# h :: only for endo in this variation
- h_best = optimize(errorOnLastNdays, c(0,10), kernel=kernel)$minimum
+ h_best = optimize(errorOnLastNdays, c(0,7), kernel=kernel)$minimum
return (private$.predictShapeAux(data,fdays,today,horizon,h_best,kernel,TRUE))
}
),
if (length(fdays) < 3)
return (NA)
- # ENDO:: Distances from last observed day to days in the past
- distances2 = rep(NA, length(fdays))
- for (i in seq_along(fdays))
+ # Neighbors: days in "same season"
+ sdays = getSimilarDaysIndices(today, limit=45, same_season=TRUE, data)
+ indices = intersect(fdays,sdays)
+ levelToday = data$getLevel(today)
+ distances = sapply(seq_along(indices), function(i) abs(data$getLevel(i)-levelToday))
+ same_pollution = (distances <= 2)
+ if (sum(same_pollution) < 3) #TODO: 3 == magic number
{
- delta = data$getSerie(today) - data$getSerie(fdays[i])
- # Require at least half of non-NA common values to compute the distance
- if ( !any( is.na(delta) ) )
- distances2[i] = mean(delta^2)
+ same_pollution = (distances <= 5)
+ if (sum(same_pollution) < 3)
+ return (NA)
}
+ indices = indices[same_pollution]
+
+ # Now OK: indices same season, same pollution level
+ # ...........
+
+
+ # ENDO:: Distances from last observed day to days in the past
+ serieToday = data$getSerie(today)
+ distances2 = sapply(indices, function(i) {
+ delta = serieToday - data$getSerie(i)
+ distances2[i] = mean(delta^2)
+ })
sd_dist = sd(distances2)
if (sd_dist < .Machine$double.eps)
u
}
-
- # EXOGENS: distances computations are enough
- # TODO: search among similar concentrations....... at this stage ?!
- M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
- M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
- for (i in seq_along(fdays))
- M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
-
- sigma = cov(M) #NOTE: robust covariance is way too slow
- sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
-
- # Distances from last observed day to days in the past
- distances2 = rep(NA, nrow(M)-1)
- for (i in 2:nrow(M))
- {
- delta = M[1,] - M[i,]
- distances2[i-1] = delta %*% sigma_inv %*% delta
- }
-
- ppv <- sort(distances2, index.return=TRUE)$ix[1:10] #..............
-#PPV pour endo ?
-
-
- similarities =
- if (simtype == "exo")
- simils_exo
- else if (simtype == "endo")
- simils_endo
- else #mix
- simils_endo * simils_exo
+# # EXOGENS: distances computations are enough
+# # TODO: search among similar concentrations....... at this stage ?!
+# M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
+# M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
+# for (i in seq_along(fdays))
+# M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
+#
+# sigma = cov(M) #NOTE: robust covariance is way too slow
+# sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
+#
+# # Distances from last observed day to days in the past
+# distances2 = rep(NA, nrow(M)-1)
+# for (i in 2:nrow(M))
+# {
+# delta = M[1,] - M[i,]
+# distances2[i-1] = delta %*% sigma_inv %*% delta
+# }
+
+ similarities = simils_endo
prediction = rep(0, horizon)
- for (i in seq_along(fdays))
- prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon]
+ for (i in seq_along(indices))
+ prediction = prediction + similarities[i] * data$getSerie(indices[i]+1)[1:horizon]
prediction = prediction / sum(similarities, na.rm=TRUE)
if (final_call)
{
private$.params$weights <- similarities
- private$.params$indices <- fdays
+ private$.params$indices <- indices
private$.params$window <- h
}