From ea5c7e56ca05a51ce4f0535ffa08cda4c14bff4a Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Wed, 29 Mar 2017 11:40:08 +0200
Subject: [PATCH] fix window bounds

---
 pkg/DESCRIPTION      |  2 +-
 pkg/R/F_Neighbors.R  |  6 +++---
 pkg/R/F_Neighbors2.R | 23 ++++++++++++++++-------
 reports/report.gj    |  5 +++--
 reports/report.ipynb | 21 +++++++++++++++++----
 5 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION
index c90ce4f..849ff92 100644
--- a/pkg/DESCRIPTION
+++ b/pkg/DESCRIPTION
@@ -23,7 +23,7 @@ Suggests:
 LazyData: yes
 URL: http://git.auder.net/?p=talweg.git
 License: MIT + file LICENSE
-RoxygenNote: 6.0.1
+RoxygenNote: 5.0.1
 Collate:
     'A_NAMESPACE.R'
     'Data.R'
diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R
index c8a3355..27cd23a 100644
--- a/pkg/R/F_Neighbors.R
+++ b/pkg/R/F_Neighbors.R
@@ -56,12 +56,12 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 			if (simtype != "endo")
 			{
 				h_best_exo = optimize(
-					errorOnLastNdays, c(0,10), kernel=kernel, simtype="exo")$minimum
+					errorOnLastNdays, c(0,7), kernel=kernel, simtype="exo")$minimum
 			}
 			if (simtype != "exo")
 			{
 				h_best_endo = optimize(
-					errorOnLastNdays, c(0,10), kernel=kernel, simtype="endo")$minimum
+					errorOnLastNdays, c(0,7), kernel=kernel, simtype="endo")$minimum
 			}
 
 			if (simtype == "endo")
@@ -168,11 +168,11 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 					simils_endo
 				else #mix
 					simils_endo * simils_exo
+			similarities = similarities / sum(similarities)
 
 			prediction = rep(0, horizon)
 			for (i in seq_along(fdays))
 				prediction = prediction + similarities[i] * data$getCenteredSerie(fdays[i]+1)[1:horizon]
-			prediction = prediction / sum(similarities, na.rm=TRUE)
 
 			if (final_call)
 			{
diff --git a/pkg/R/F_Neighbors2.R b/pkg/R/F_Neighbors2.R
index 69e69dc..60916b4 100644
--- a/pkg/R/F_Neighbors2.R
+++ b/pkg/R/F_Neighbors2.R
@@ -56,12 +56,12 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 			if (simtype != "endo")
 			{
 				h_best_exo = optimize(
-					errorOnLastNdays, c(0,10), kernel=kernel, simtype="exo")$minimum
+					errorOnLastNdays, c(0,7), kernel=kernel, simtype="exo")$minimum
 			}
 			if (simtype != "exo")
 			{
 				h_best_endo = optimize(
-					errorOnLastNdays, c(0,10), kernel=kernel, simtype="endo")$minimum
+					errorOnLastNdays, c(0,7), kernel=kernel, simtype="endo")$minimum
 			}
 
 			if (simtype == "endo")
@@ -97,16 +97,25 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 				return (NA)
 			levelToday = data$getLevel(today)
 			distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday))
-			dist_thresh = 1
+			#TODO: 2, 3, 5, 10 magic numbers here...
+			dist_thresh = 2
+			min_neighbs = min(3,length(fdays))
 			repeat
 			{
 				same_pollution = (distances <= dist_thresh)
-				if (sum(same_pollution) >= 2) #will eventually happen
+				nb_neighbs = sum(same_pollution)
+				if (nb_neighbs >= min_neighbs) #will eventually happen
 					break
-				dist_thresh = dist_thresh + 1
+				dist_thresh = dist_thresh + 3
 			}
 			fdays = fdays[same_pollution]
-			if (length(fdays) == 1)
+			max_neighbs = 10
+			if (nb_neighbs > max_neighbs)
+			{
+				# Keep only max_neighbs closest neighbors
+				fdays = fdays[ sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ]
+			}
+			if (length(fdays) == 1) #the other extreme...
 			{
 				if (final_call)
 				{
@@ -194,11 +203,11 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 					simils_endo
 				else #mix
 					simils_endo * simils_exo
+			similarities = similarities / sum(similarities)
 
 			prediction = rep(0, horizon)
 			for (i in seq_along(fdays))
 				prediction = prediction + similarities[i] * data$getCenteredSerie(fdays[i]+1)[1:horizon]
-			prediction = prediction / sum(similarities, na.rm=TRUE)
 
 			if (final_call)
 			{
diff --git a/reports/report.gj b/reports/report.gj
index 657b1d7..1a6b8d9 100644
--- a/reports/report.gj
+++ b/reports/report.gj
@@ -82,8 +82,9 @@ par(mfrow=c(1,2))
 plotFilamentsBox(data, f_np); title(paste("FilBox nn day",i_np))
 plotFilamentsBox(data, f_p); title(paste("FilBox nn day",i_p))
 
-plotFilamentsBox(data, f_np2); title(paste("FilBox nn2 day",i_np))
-plotFilamentsBox(data, f_p2); title(paste("FilBox nn2 day",i_p))
+# Generally too few neighbors:
+#plotFilamentsBox(data, f_np2); title(paste("FilBox nn2 day",i_np))
+#plotFilamentsBox(data, f_p2); title(paste("FilBox nn2 day",i_p))
 -----r
 par(mfrow=c(1,2))
 plotRelVar(data, f_np); title(paste("StdDev nn day",i_np))
diff --git a/reports/report.ipynb b/reports/report.ipynb
index 8af4acd..c4656a9 100644
--- a/reports/report.ipynb
+++ b/reports/report.ipynb
@@ -87,7 +87,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": [
@@ -171,11 +173,22 @@
    "outputs": [],
    "source": [
     "par(mfrow=c(1,2))\n",
-    "plotFilamentsBox(data, f_np); title(paste(\"FilBox nn day\",i_np))\n",
-    "plotFilamentsBox(data, f_p); title(paste(\"FilBox nn day\",i_p))\n",
+    "#plotFilamentsBox(data, f_np); title(paste(\"FilBox nn day\",i_np))\n",
+    "#plotFilamentsBox(data, f_p); title(paste(\"FilBox nn day\",i_p))\n",
     "\n",
     "plotFilamentsBox(data, f_np2); title(paste(\"FilBox nn2 day\",i_np))\n",
-    "plotFilamentsBox(data, f_p2); title(paste(\"FilBox nn2 day\",i_p))"
+    "#plotFilamentsBox(data, f_p2); title(paste(\"FilBox nn2 day\",i_p))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "f_np2"
    ]
   },
   {
-- 
2.44.0