various fixes (especially in plotFbox and Neighbors shape predict
[talweg.git] / pkg / R / F_Neighbors.R
index f1aecb5..ffb6d37 100644 (file)
@@ -61,19 +61,20 @@ NeighborsForecaster = setRefClass(
 
                        #GET OPTIONAL PARAMS
                        # Similarity computed with exogenous variables ? endogenous ? both ? ("exo","endo","mix")
-                       simtype = ifelse(hasArg("simtype"), list(...)$simtype, "exo")
+                       simtype = ifelse(hasArg("simtype"), list(...)$simtype, "mix")
                        simthresh = ifelse(hasArg("simthresh"), list(...)$simthresh, 0.)
-                       kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss")
-                       mix_strategy = ifelse(hasArg("mix_strategy"), list(...)$mix_strategy, "neighb") #or "mult"
-                       same_season = ifelse(hasArg("same_season"), list(...)$same_season, TRUE)
+                       kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan"
+                       mix_strategy = ifelse(hasArg("mix_strategy"), list(...)$mix_strategy, "mult") #or "neighb"
+                       same_season = ifelse(hasArg("same_season"), list(...)$same_season, FALSE)
                        if (hasArg(h_window))
                                return (.predictShapeAux(fdays_indices, today, horizon, list(...)$h_window, kernel,
-                                       simtype, simthresh, mix_strategy, FALSE))
+                                       simtype, simthresh, mix_strategy, TRUE))
                        #END GET
 
                        # Indices for cross-validation; TODO: 45 = magic number
                        indices = getSimilarDaysIndices(today, limit=45, same_season=same_season)
-                       #indices = (end_index-45):(end_index-1)
+                       if (tail(indices,1) == 1)
+                               indices = head(indices,-1)
 
                        # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
                        errorOnLastNdays = function(h, kernel, simtype)
@@ -150,10 +151,12 @@ NeighborsForecaster = setRefClass(
                                }
 
                                sd_dist = sd(distances2)
+                               if (sd_dist < .Machine$double.eps)
+                                       sd_dist = 1 #mostly for tests... FIXME:
                                simils_endo =
-                                       if (kernel=="Gauss") {
+                                       if (kernel=="Gauss")
                                                exp(-distances2/(sd_dist*h_endo^2))
-                                       else { #Epanechnikov
+                                       else { #Epanechnikov
                                                u = 1 - distances2/(sd_dist*h_endo^2)
                                                u[abs(u)>1] = 0.
                                                u
@@ -164,17 +167,16 @@ NeighborsForecaster = setRefClass(
                        {
                                h_exo = ifelse(simtype=="mix", h[2], h)
 
-                               # TODO: [rnormand] if predict_at == 0h then we should use measures from day minus 1
-                               M = matrix( nrow=1+length(fdays_indices), ncol=1+length(dat[[today]]$exo_hat) )
-                               M[1,] = c( dat[[today]]$level, as.double(dat[[today]]$exo_hat) )
+                               M = matrix( nrow=1+length(fdays_indices), ncol=1+length(dat[[today]]$exo) )
+                               M[1,] = c( dat[[today]]$level, as.double(dat[[today]]$exo) )
                                for (i in seq_along(fdays_indices))
                                {
                                        M[i+1,] = c( dat[[ fdays_indices[i] ]]$level,
-                                               as.double(dat[[ fdays_indices[i] ]]$exo_hat) )
+                                               as.double(dat[[ fdays_indices[i] ]]$exo) )
                                }
 
                                sigma = cov(M) #NOTE: robust covariance is way too slow
-                               sigma_inv = qr.solve(sigma)
+                               sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
 
                                # Distances from last observed day to days in the past
                                distances2 = rep(NA, nrow(M)-1)
@@ -203,10 +205,9 @@ NeighborsForecaster = setRefClass(
                                        #TODO: 60 = magic number
                                        keep_indices = sort(simils_exo, index.return=TRUE)$ix[1:(min(60,length(simils_exo)))]
                                        simils_endo[-keep_indices] = 0.
-                               } else #mix_strategy == "mult"
-                               {
-                                       simils_endo = simils_endo * simils_exo
                                }
+                               else #mix_strategy == "mult"
+                                       simils_endo = simils_endo * simils_exo
                        }
 
                        similarities =
@@ -236,8 +237,8 @@ NeighborsForecaster = setRefClass(
                        prediction = rep(0, horizon)
                        for (i in seq_along(fdays_indices))
                                prediction = prediction + similarities[i] * dat[[ fdays_indices[i]+1 ]]$serie[1:horizon]
-
                        prediction = prediction / sum(similarities, na.rm=TRUE)
+
                        if (final_call)
                        {
                                params$weights <<- similarities
@@ -251,6 +252,7 @@ NeighborsForecaster = setRefClass(
                                                c(h_endo,h_exo)
                                        }
                        }
+
                        return (prediction)
                }
        )