From 5c49f6cecd547358b327e9363e62bcc8219e9e33 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Mon, 27 Mar 2017 03:22:55 +0200
Subject: [PATCH] draft Neighbors2; fix bug in Neighbors1

---
 pkg/R/F_Neighbors.R  |   4 +-
 pkg/R/F_Neighbors2.R | 149 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 2 deletions(-)
 create mode 100644 pkg/R/F_Neighbors2.R

diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R
index 5b1f826..600c5c8 100644
--- a/pkg/R/F_Neighbors.R
+++ b/pkg/R/F_Neighbors.R
@@ -103,7 +103,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 					# Require at least half of non-NA common values to compute the distance
 					if ( !any( is.na(delta) ) )
 						distances2[i] = mean(delta^2)
-				}
+				Centered}
 
 				sd_dist = sd(distances2)
 				if (sd_dist < .Machine$double.eps)
@@ -171,7 +171,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 
 			prediction = rep(0, horizon)
 			for (i in seq_along(fdays))
-				prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon]
+				prediction = prediction + similarities[i] * data$getCenteredSerie(fdays[i]+1)[1:horizon]
 			prediction = prediction / sum(similarities, na.rm=TRUE)
 
 			if (final_call)
diff --git a/pkg/R/F_Neighbors2.R b/pkg/R/F_Neighbors2.R
new file mode 100644
index 0000000..e6addde
--- /dev/null
+++ b/pkg/R/F_Neighbors2.R
@@ -0,0 +1,149 @@
+#' @include Forecaster.R
+#'
+#' Neighbors2 Forecaster
+#'
+#' Predict tomorrow as a weighted combination of "futures of the past" days.
+#' Inherits \code{\link{Forecaster}}
+#'
+Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
+	inherit = Forecaster,
+
+	public = list(
+		predictShape = function(data, today, memory, horizon, ...)
+		{
+			# (re)initialize computed parameters
+			private$.params <- list("weights"=NA, "indices"=NA, "window"=NA)
+
+			# Do not forecast on days with NAs (TODO: softer condition...)
+			if (any(is.na(data$getCenteredSerie(today))))
+				return (NA)
+
+			# Determine indices of no-NAs days followed by no-NAs tomorrows
+			fdays = getNoNA2(data, max(today-memory,1), today-1)
+
+			# Get optional args
+			kernel = ifelse(hasArg("kernel"), list(...)$kernel, "Gauss") #or "Epan"
+			if (hasArg(h_window))
+			{
+				return ( private$.predictShapeAux(data,
+					fdays, today, horizon, list(...)$h_window, kernel, TRUE) )
+			}
+
+
+			# Indices of similar days for cross-validation; TODO: 45 = magic number
+			# TODO: ici faut une sorte de "same_season==TRUE" --> mois similaires epandage
+			sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
+
+
+			# Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
+			errorOnLastNdays = function(h, kernel)
+			{
+				error = 0
+				nb_jours = 0
+				for (i in intersect(fdays,sdays))
+				{
+					# mix_strategy is never used here (simtype != "mix"), therefore left blank
+					prediction = private$.predictShapeAux(data, fdays, i, horizon, h, kernel, FALSE)
+					if (!is.na(prediction[1]))
+					{
+						nb_jours = nb_jours + 1
+						error = error +
+							mean((data$getSerie(i+1)[1:horizon] - prediction)^2)
+					}
+				}
+				return (error / nb_jours)
+			}
+
+			# h :: only for endo in this variation
+			h_best_endo = optimize(errorOnLastNdays, c(0,10), kernel=kernel)$minimum
+
+			return (private$.predictShapeAux(data, fdays, today, horizon, h_best, kernel, TRUE))
+		}
+	),
+	private = list(
+		# Precondition: "today" is full (no NAs)
+		.predictShapeAux = function(data, fdays, today, horizon, h, kernel, final_call)
+		{
+			fdays = fdays[ fdays < today ]
+			# TODO: 3 = magic number
+			if (length(fdays) < 3)
+				return (NA)
+
+			# ENDO:: Distances from last observed day to days in the past
+			distances2 = rep(NA, length(fdays))
+			for (i in seq_along(fdays))
+			{
+				delta = data$getSerie(today) - data$getSerie(fdays[i])
+				# Require at least half of non-NA common values to compute the distance
+				if ( !any( is.na(delta) ) )
+					distances2[i] = mean(delta^2)
+			}
+
+			sd_dist = sd(distances2)
+			if (sd_dist < .Machine$double.eps)
+			{
+#					warning("All computed distances are very close: stdev too small")
+				sd_dist = 1 #mostly for tests... FIXME:
+			}
+			simils_endo =
+				if (kernel=="Gauss")
+					exp(-distances2/(sd_dist*h_endo^2))
+				else
+				{
+					# Epanechnikov
+					u = 1 - distances2/(sd_dist*h_endo^2)
+					u[abs(u)>1] = 0.
+					u
+				}
+
+			# EXOGENS: distances computations are enough
+			# TODO: search among similar concentrations....... at this stage ?!
+			M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
+			M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
+			for (i in seq_along(fdays))
+				M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
+
+			sigma = cov(M) #NOTE: robust covariance is way too slow
+			sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
+
+			# Distances from last observed day to days in the past
+			distances2 = rep(NA, nrow(M)-1)
+			for (i in 2:nrow(M))
+			{
+				delta = M[1,] - M[i,]
+				distances2[i-1] = delta %*% sigma_inv %*% delta
+			}
+
+			ppv <- sort(distances2, index.return=TRUE)$ix[1:10] #..............
+#PPV pour endo ?
+
+			similarities =
+				if (simtype == "exo")
+					simils_exo
+				else if (simtype == "endo")
+					simils_endo
+				else #mix
+					simils_endo * simils_exo
+
+			prediction = rep(0, horizon)
+			for (i in seq_along(fdays))
+				prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon]
+			prediction = prediction / sum(similarities, na.rm=TRUE)
+
+			if (final_call)
+			{
+				private$.params$weights <- similarities
+				private$.params$indices <- fdays
+				private$.params$window <-
+					if (simtype=="endo")
+						h_endo
+					else if (simtype=="exo")
+						h_exo
+					else #mix
+						c(h_endo,h_exo)
+			}
+
+			return (prediction)
+		}
+	)
+)
-- 
2.44.0