No longer direct predict for Neighbors2: recollement comme Neighbors (better)
[talweg.git] / pkg / R / F_Neighbors.R
index 5b2c899..c8a3355 100644 (file)
@@ -30,12 +30,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
                                        fdays, today, horizon, list(...)$h_window, kernel, simtype, TRUE) )
                        }
 
-                       # Indices of similar days for cross-validation; TODO: 45 = magic number
-                       sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
-
-                       cv_days = intersect(fdays,sdays)
-                       # Limit to 20 most recent matching days (TODO: 20 == magic number)
-                       cv_days = sort(cv_days,decreasing=TRUE)[1:min(20,length(cv_days))]
+                       # Indices of similar days for cross-validation; TODO: 20 = magic number
+                       cv_days = getSimilarDaysIndices(today, data, limit=20, same_season=FALSE, days_in=fdays)
 
                        # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
                        errorOnLastNdays = function(h, kernel, simtype)
@@ -134,7 +130,12 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
                                        M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
 
                                sigma = cov(M) #NOTE: robust covariance is way too slow
-                               sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
+                               # TODO: 10 == magic number; more robust way == det, or always ginv()
+                               sigma_inv =
+                                       if (length(fdays) > 10)
+                                               solve(sigma)
+                                       else
+                                               MASS::ginv(sigma)
 
                                # Distances from last observed day to days in the past
                                distances2 = sapply(seq_along(fdays), function(i) {
@@ -143,7 +144,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
                                })
 
                                sd_dist = sd(distances2)
-                               if (sd_dist < .Machine$double.eps)
+                               if (sd_dist < .25 * sqrt(.Machine$double.eps))
                                {
 #                                      warning("All computed distances are very close: stdev too small")
                                        sd_dist = 1 #mostly for tests... FIXME: