From cf3bb00128ac8cb930996455faf7c99a3fc102fb Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Tue, 25 Apr 2017 10:06:00 +0200
Subject: [PATCH] fix mistake in yersteday/today computations

---
 pkg/R/F_Neighbors.R | 82 +++++++++++++++++++++++----------------------
 pkg/R/plot.R        |  6 ++--
 pkg/R/utils.R       |  4 +--
 3 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R
index ea18bb6..f140b0b 100644
--- a/pkg/R/F_Neighbors.R
+++ b/pkg/R/F_Neighbors.R
@@ -45,14 +45,14 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 			private$.params <- list("weights"=NA, "indices"=NA, "window"=NA)
 
 			# Do not forecast on days with NAs (TODO: softer condition...)
-			if (any(is.na(data$getSerie(today-1)))
-				|| any(is.na(data$getSerie(today)[1:(predict_from-1)])))
+			if (any(is.na(data$getSerie(today-1))) ||
+				(predict_from>=2 && any(is.na(data$getSerie(today)[1:(predict_from-1)]))))
 			{
 				return (NA)
 			}
 
-			# Determine indices of no-NAs days followed by no-NAs tomorrows
-			fdays = .getNoNA2(data, max(today-memory,1), today-2)
+			# Determine indices of no-NAs days preceded by no-NAs yerstedays
+			tdays = .getNoNA2(data, max(today-memory,2), today-1)
 
 			# Get optional args
 			local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season?
@@ -60,12 +60,12 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 			if (hasArg("window"))
 			{
 				return ( private$.predictShapeAux(data,
-					fdays, today, predict_from, horizon, local, list(...)$window, simtype, TRUE) )
+					tdays, today, predict_from, horizon, local, list(...)$window, simtype, TRUE) )
 			}
 
 			# Indices of similar days for cross-validation; TODO: 20 = magic number
 			cv_days = getSimilarDaysIndices(today, data, limit=20, same_season=FALSE,
-				days_in=fdays)
+				days_in=tdays)
 
 			# Optimize h : h |--> sum of prediction errors on last N "similar" days
 			errorOnLastNdays = function(window, simtype)
@@ -75,13 +75,13 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 				for (i in seq_along(cv_days))
 				{
 					# mix_strategy is never used here (simtype != "mix"), therefore left blank
-					prediction = private$.predictShapeAux(data, fdays, cv_days[i], predict_from,
+					prediction = private$.predictShapeAux(data, tdays, cv_days[i], predict_from,
 						horizon, local, window, simtype, FALSE)
 					if (!is.na(prediction[1]))
 					{
 						nb_jours = nb_jours + 1
 						error = error +
-							mean((data$getSerie(cv_days[i]+1)[predict_from:horizon] - prediction)^2)
+							mean((data$getSerie(cv_days[i])[predict_from:horizon] - prediction)^2)
 					}
 				}
 				return (error / nb_jours)
@@ -109,41 +109,41 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 				else #none: value doesn't matter
 					1
 
-			return( private$.predictShapeAux(data, fdays, today, predict_from, horizon, local,
+			return( private$.predictShapeAux(data, tdays, today, predict_from, horizon, local,
 				best_window, simtype, TRUE) )
 		}
 	),
 	private = list(
 		# Precondition: "today" is full (no NAs)
-		.predictShapeAux = function(data, fdays, today, predict_from, horizon, local, window,
+		.predictShapeAux = function(data, tdays, today, predict_from, horizon, local, window,
 			simtype, final_call)
 		{
-			fdays_cut = fdays[ fdays < today ]
-			if (length(fdays_cut) <= 1)
+			tdays_cut = tdays[ tdays <= today-1 ]
+			if (length(tdays_cut) <= 1)
 				return (NA)
 
 			if (local)
 			{
 				# TODO: 60 == magic number
-				fdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE,
-					days_in=fdays_cut)
-				if (length(fdays) <= 1)
+				tdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE,
+					days_in=tdays_cut)
+				if (length(tdays) <= 1)
 					return (NA)
 				# TODO: 10, 12 == magic numbers
-				fdays = .getConstrainedNeighbs(today,data,fdays,min_neighbs=10,max_neighbs=12)
-				if (length(fdays) == 1)
+				tdays = .getConstrainedNeighbs(today,data,tdays,min_neighbs=10,max_neighbs=12)
+				if (length(tdays) == 1)
 				{
 					if (final_call)
 					{
 						private$.params$weights <- 1
-						private$.params$indices <- fdays
+						private$.params$indices <- tdays
 						private$.params$window <- 1
 					}
-					return ( data$getSerie(fdays[1]+1)[predict_from:horizon] )
+					return ( data$getSerie(tdays[1])[predict_from:horizon] )
 				}
 			}
 			else
-				fdays = fdays_cut #no conditioning
+				tdays = tdays_cut #no conditioning
 
 			if (simtype == "endo" || simtype == "mix")
 			{
@@ -151,9 +151,11 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 				window_endo = ifelse(simtype=="mix", window[1], window)
 
 				# Distances from last observed day to days in the past
-				lastSerie = c( data$getSerie(today-1), data$getSerie(today)[1:(predict_from-1)] )
-				distances2 = sapply(fdays, function(i) {
-					delta = lastSerie - c(data$getSerie(i),data$getSerie(i+1)[1:(predict_from-1)])
+				lastSerie = c( data$getSerie(today-1),
+					data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()] )
+				distances2 = sapply(tdays, function(i) {
+					delta = lastSerie - c(data$getSerie(i-1),
+						data$getSerie(i)[if (predict_from>=2) 1:(predict_from-1) else c()])
 					sqrt(mean(delta^2))
 				})
 
@@ -165,21 +167,21 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 				# Compute exogen similarities using given window
 				window_exo = ifelse(simtype=="mix", window[2], window)
 
-				M = matrix( ncol=1+length(fdays), nrow=1+length(data$getExo(1)) )
+				M = matrix( ncol=1+length(tdays), nrow=1+length(data$getExo(1)) )
 				M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) )
-				for (i in seq_along(fdays))
-					M[,i+1] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
+				for (i in seq_along(tdays))
+					M[,i+1] = c( data$getLevel(tdays[i]), as.double(data$getExo(tdays[i])) )
 
 				sigma = cov(t(M)) #NOTE: robust covariance is way too slow
 				# TODO: 10 == magic number; more robust way == det, or always ginv()
 				sigma_inv =
-					if (length(fdays) > 10)
+					if (length(tdays) > 10)
 						solve(sigma)
 					else
 						MASS::ginv(sigma)
 
 				# Distances from last observed day to days in the past
-				distances2 = sapply(seq_along(fdays), function(i) {
+				distances2 = sapply(seq_along(tdays), function(i) {
 					delta = M[,1] - M[,i+1]
 					delta %*% sigma_inv %*% delta
 				})
@@ -195,20 +197,20 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 				else if (simtype == "mix")
 					simils_endo * simils_exo
 				else #none
-					rep(1, length(fdays))
+					rep(1, length(tdays))
 			similarities = similarities / sum(similarities)
 
 			prediction = rep(0, horizon-predict_from+1)
-			for (i in seq_along(fdays))
+			for (i in seq_along(tdays))
 			{
 				prediction = prediction +
-					similarities[i] * data$getSerie(fdays[i]+1)[predict_from:horizon]
+					similarities[i] * data$getSerie(tdays[i])[predict_from:horizon]
 			}
 
 			if (final_call)
 			{
 				private$.params$weights <- similarities
-				private$.params$indices <- fdays
+				private$.params$indices <- tdays
 				private$.params$window <-
 					if (simtype=="endo")
 						window_endo
@@ -231,20 +233,20 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 #
 # @param today Index of current day
 # @param data Object of class Data
-# @param fdays Current set of "first days" (no-NA pairs)
+# @param tdays Current set of "second days" (no-NA pairs)
 # @param min_neighbs Minimum number of points in a neighborhood
 # @param max_neighbs Maximum number of points in a neighborhood
 #
-.getConstrainedNeighbs = function(today, data, fdays, min_neighbs=10, max_neighbs=12)
+.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10, max_neighbs=12)
 {
 	levelToday = data$getLevelHat(today)
 	levelYersteday = data$getLevel(today-1)
-	distances = sapply(fdays, function(i) {
-		sqrt((data$getLevel(i)-levelYersteday)^2 + (data$getLevel(i+1)-levelToday)^2)
+	distances = sapply(tdays, function(i) {
+		sqrt((data$getLevel(i-1)-levelYersteday)^2 + (data$getLevel(i)-levelToday)^2)
 	})
 	#TODO: 1, +1, +3 : magic numbers
 	dist_thresh = 1
-	min_neighbs = min(min_neighbs,length(fdays))
+	min_neighbs = min(min_neighbs,length(tdays))
 	repeat
 	{
 		same_pollution = (distances <= dist_thresh)
@@ -253,14 +255,14 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 			break
 		dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
 	}
-	fdays = fdays[same_pollution]
+	tdays = tdays[same_pollution]
 	max_neighbs = 12
 	if (nb_neighbs > max_neighbs)
 	{
 		# Keep only max_neighbs closest neighbors
-		fdays = fdays[ order(distances[same_pollution])[1:max_neighbs] ]
+		tdays = tdays[ order(distances[same_pollution])[1:max_neighbs] ]
 	}
-	fdays
+	tdays
 }
 
 # compute similarities
diff --git a/pkg/R/plot.R b/pkg/R/plot.R
index 59a26a7..ad0ed4e 100644
--- a/pkg/R/plot.R
+++ b/pkg/R/plot.R
@@ -236,10 +236,10 @@ plotRelVar = function(data, fil, predict_from)
 {
 	ref_var = c( apply(data$getSeries(fil$neighb_indices),1,sd),
 		apply(data$getSeries(fil$neighb_indices+1),1,sd) )
-	fdays = .getNoNA2(data, 1, fil$index-1)
+	tdays = .getNoNA2(data, 2, fil$index)
 	global_var = c(
-		apply(data$getSeries(fdays),1,sd),
-		apply(data$getSeries(fdays+1),1,sd) )
+		apply(data$getSeries(tdays-1),1,sd),
+		apply(data$getSeries(tdays),1,sd) )
 
 	yrange = range(ref_var, global_var)
 	par(mar=c(4.7,5,1,1), cex.axis=1.5, cex.lab=1.5)
diff --git a/pkg/R/utils.R b/pkg/R/utils.R
index 96ec601..a4efd61 100644
--- a/pkg/R/utils.R
+++ b/pkg/R/utils.R
@@ -118,7 +118,7 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL)
 
 # getNoNA2
 #
-# Get indices in data of no-NA series followed by no-NA, within [first,last] range.
+# Get indices in data of no-NA series preceded by no-NA, within [first,last] range.
 #
 # @inheritParams dateIndexToInteger
 # @param first First index (included)
@@ -127,6 +127,6 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL)
 .getNoNA2 = function(data, first, last)
 {
 	(first:last)[ sapply(first:last, function(i)
-		!any( is.na(data$getSerie(i)) | is.na(data$getSerie(i+1)) )
+		!any( is.na(data$getSerie(i-1)) | is.na(data$getSerie(i)) )
 	) ]
 }
-- 
2.44.0