X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=pkg%2FR%2FF_Neighbors.R;h=ffb6d371df57a910451f0a79ffeed340886132ff;hb=99f83c9af27492f6fb9b10f51fb8704ed588f5c1;hp=f1aecb5ccb637a581e1984f444564786ce561ff6;hpb=469529710f56c790ae932b45d13fed2e34bcabf2;p=talweg.git diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index f1aecb5..ffb6d37 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -61,19 +61,20 @@ NeighborsForecaster = setRefClass( #GET OPTIONAL PARAMS # Similarity computed with exogenous variables ? endogenous ? both ? ("exo","endo","mix") - simtype = ifelse(hasArg("simtype"), list(...)$simtype, "exo") + simtype = ifelse(hasArg("simtype"), list(...)$simtype, "mix") simthresh = ifelse(hasArg("simthresh"), list(...)$simthresh, 0.) - kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") - mix_strategy = ifelse(hasArg("mix_strategy"), list(...)$mix_strategy, "neighb") #or "mult" - same_season = ifelse(hasArg("same_season"), list(...)$same_season, TRUE) + kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan" + mix_strategy = ifelse(hasArg("mix_strategy"), list(...)$mix_strategy, "mult") #or "neighb" + same_season = ifelse(hasArg("same_season"), list(...)$same_season, FALSE) if (hasArg(h_window)) return (.predictShapeAux(fdays_indices, today, horizon, list(...)$h_window, kernel, - simtype, simthresh, mix_strategy, FALSE)) + simtype, simthresh, mix_strategy, TRUE)) #END GET # Indices for cross-validation; TODO: 45 = magic number indices = getSimilarDaysIndices(today, limit=45, same_season=same_season) - #indices = (end_index-45):(end_index-1) + if (tail(indices,1) == 1) + indices = head(indices,-1) # Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days errorOnLastNdays = function(h, kernel, simtype) @@ -150,10 +151,12 @@ NeighborsForecaster = setRefClass( } sd_dist = sd(distances2) + if (sd_dist < .Machine$double.eps) + sd_dist = 1 #mostly for tests... FIXME: simils_endo = - if (kernel=="Gauss") { + if (kernel=="Gauss") exp(-distances2/(sd_dist*h_endo^2)) - } else { #Epanechnikov + else { #Epanechnikov u = 1 - distances2/(sd_dist*h_endo^2) u[abs(u)>1] = 0. u @@ -164,17 +167,16 @@ NeighborsForecaster = setRefClass( { h_exo = ifelse(simtype=="mix", h[2], h) - # TODO: [rnormand] if predict_at == 0h then we should use measures from day minus 1 - M = matrix( nrow=1+length(fdays_indices), ncol=1+length(dat[[today]]$exo_hat) ) - M[1,] = c( dat[[today]]$level, as.double(dat[[today]]$exo_hat) ) + M = matrix( nrow=1+length(fdays_indices), ncol=1+length(dat[[today]]$exo) ) + M[1,] = c( dat[[today]]$level, as.double(dat[[today]]$exo) ) for (i in seq_along(fdays_indices)) { M[i+1,] = c( dat[[ fdays_indices[i] ]]$level, - as.double(dat[[ fdays_indices[i] ]]$exo_hat) ) + as.double(dat[[ fdays_indices[i] ]]$exo) ) } sigma = cov(M) #NOTE: robust covariance is way too slow - sigma_inv = qr.solve(sigma) + sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed? # Distances from last observed day to days in the past distances2 = rep(NA, nrow(M)-1) @@ -203,10 +205,9 @@ NeighborsForecaster = setRefClass( #TODO: 60 = magic number keep_indices = sort(simils_exo, index.return=TRUE)$ix[1:(min(60,length(simils_exo)))] simils_endo[-keep_indices] = 0. - } else #mix_strategy == "mult" - { - simils_endo = simils_endo * simils_exo } + else #mix_strategy == "mult" + simils_endo = simils_endo * simils_exo } similarities = @@ -236,8 +237,8 @@ NeighborsForecaster = setRefClass( prediction = rep(0, horizon) for (i in seq_along(fdays_indices)) prediction = prediction + similarities[i] * dat[[ fdays_indices[i]+1 ]]$serie[1:horizon] - prediction = prediction / sum(similarities, na.rm=TRUE) + if (final_call) { params$weights <<- similarities @@ -251,6 +252,7 @@ NeighborsForecaster = setRefClass( c(h_endo,h_exo) } } + return (prediction) } )