From a866acb3c0ae138b22df9dae9ec576b866794417 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Tue, 28 Mar 2017 18:53:04 +0200
Subject: [PATCH] cleaning - fix getSimilarDaysIndices ; to finish

---
 pkg/R/F_Neighbors2.R    | 40 ++++++++++++++++++++++++-------------
 pkg/R/computeForecast.R | 28 ++++++++++++++++++--------
 pkg/R/utils.R           | 44 ++++++++++++++++++++---------------------
 reports/report.ipynb    | 44 +++++++++++++++++++++++++----------------
 4 files changed, 95 insertions(+), 61 deletions(-)

diff --git a/pkg/R/F_Neighbors2.R b/pkg/R/F_Neighbors2.R
index fb63e40..83ef453 100644
--- a/pkg/R/F_Neighbors2.R
+++ b/pkg/R/F_Neighbors2.R
@@ -9,15 +9,15 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 	inherit = Forecaster,
 
 	public = list(
-		predictSerie = function(data, today, memory, horizon, ...)
-		{
-			# Parameters (potentially) computed during shape prediction stage
-			predicted_shape = self$predictShape(data, today, memory, horizon, ...)
-#			predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...)
-			# Predicted shape is aligned it on the end of current day + jump
-#			predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta
-			predicted_shape
-		},
+#		predictSerie = function(data, today, memory, horizon, ...)
+#		{
+#			# Parameters (potentially) computed during shape prediction stage
+#			predicted_shape = self$predictShape(data, today, memory, horizon, ...)
+##			predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...)
+#			# Predicted shape is aligned it on the end of current day + jump
+##			predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta
+#			predicted_shape
+#		},
 		predictShape = function(data, today, memory, horizon, ...)
 		{
 			# (re)initialize computed parameters
@@ -101,7 +101,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 		{
 			fdays = fdays[ fdays < today ]
 			# TODO: 3 = magic number
-			if (length(fdays) < 3)
+			if (length(fdays) < 1)
 				return (NA)
 
 			# Neighbors: days in "same season"
@@ -110,14 +110,26 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 			levelToday = data$getLevel(today)
 			distances = sapply(indices, function(i) abs(data$getLevel(i)-levelToday))
 			same_pollution = (distances <= 2)
-			if (sum(same_pollution) < 3) #TODO: 3 == magic number
+			if (sum(same_pollution) < 1) #TODO: 3 == magic number
 			{
 				same_pollution = (distances <= 5)
-				if (sum(same_pollution) < 3)
+				if (sum(same_pollution) < 1)
 					return (NA)
 			}
 			indices = indices[same_pollution]
 
+			#TODO: we shouldn't need that block
+			if (length(indices) == 1)
+			{
+				if (final_call)
+				{
+					private$.params$weights <- 1
+					private$.params$indices <- indices
+					private$.params$window <- 1
+				}
+				return ( data$getSerie(indices[1])[1:horizon] ) #what else?!
+			}
+
 			if (simtype != "exo")
 			{
 				h_endo = ifelse(simtype=="mix", h[1], h)
@@ -159,7 +171,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 				sigma = cov(M) #NOTE: robust covariance is way too slow
 #				sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
 				sigma_inv = MASS::ginv(sigma)
-#if (final_call) browser()
+
 				# Distances from last observed day to days in the past
 				distances2 = sapply(seq_along(indices), function(i) {
 					delta = M[1,] - M[i+1,]
@@ -200,7 +212,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 			if (final_call)
 			{
 				private$.params$weights <- similarities
-				private$.params$indices <- fdays
+				private$.params$indices <- indices
 				private$.params$window <-
 					if (simtype=="endo")
 						h_endo
diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R
index 3537e8a..bd19b8d 100644
--- a/pkg/R/computeForecast.R
+++ b/pkg/R/computeForecast.R
@@ -59,14 +59,26 @@ computeForecast = function(data, indices, forecaster, pjump,
 #oo = forecaster$predictSerie(data, integer_indices[1], memory, horizon, ...)
 #browser()
 
-	library(parallel)
-	ppp <- parallel::mclapply(seq_along(integer_indices), function(i) {
-		list(
-			"forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
-			"params"= forecaster$getParameters(),
-			"index" = integer_indices[i] )
-		}, mc.cores=3)
-
+	parll=TRUE #FALSE
+	if (parll)
+	{
+		library(parallel)
+		ppp <- parallel::mclapply(seq_along(integer_indices), function(i) {
+			list(
+				"forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
+				"params"= forecaster$getParameters(),
+				"index" = integer_indices[i] )
+			}, mc.cores=3)
+	}
+	else
+	{
+		ppp <- lapply(seq_along(integer_indices), function(i) {
+			list(
+				"forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
+				"params"= forecaster$getParameters(),
+				"index" = integer_indices[i] )
+			})
+	}
 #browser()
 
 for (i in seq_along(integer_indices))
diff --git a/pkg/R/utils.R b/pkg/R/utils.R
index f79c300..20f396b 100644
--- a/pkg/R/utils.R
+++ b/pkg/R/utils.R
@@ -65,7 +65,7 @@ getSimilarDaysIndices = function(index, limit, same_season, data=NULL)
 {
 	index = dateIndexToInteger(index)
 
-	#TODO: mardi similaire à lundi mercredi jeudi aussi ...etc
+	#TODO: mardi similaire à lundi mercredi jeudi aussi ...etc ==> "isSimilarDay()..."
 	if (!same_season)
 	{
 		#take all similar days in recent past
@@ -73,36 +73,36 @@ getSimilarDaysIndices = function(index, limit, same_season, data=NULL)
 		return ( rep(index,nb_days) - 7*seq_len(nb_days) )
 	}
 
-
-	#TODO: use data... 12-12-1-2 CH, 3-4-9-10 EP et le reste NP
 	#Look for similar days in similar season
+	nb_days = min( (index-1) %/% 7, limit)
+	i = index - 7
 	days = c()
-	i = index
+	month_ref = as.POSIXlt(data$getTime(index)[1])$mon + 1
 	while (i >= 1 && length(days) < limit)
 	{
-		if (i < index)
-		{
+		if (isSameSeason(as.POSIXlt(data$getTime(i)[1])$mon + 1, month_ref))
 			days = c(days, i)
-			#look in the "future of the past"
-			for (j in 1:4)
-				days = c(days, i+7*j)
-		}
-		#...and in the "past of the past"
-		for (j in 1:4)
-		{
-			if (i - 7*j >= 1)
-				days = c(days, i-7*j)
-		}
-		# TODO: exact computation instead of -364
-		# 364 = closest multiple of 7 to 365 - drift along the years... but not so many years so OK
-		i = i - 364
-
-
+		i = i-7
 	}
+	return ( days )
+}
 
-	return ( days[1:min(limit,length(days))] )
+#TODO: use data... 12-12-1-2 CH, 3-4-9-10 EP et le reste NP
+isSameSeason = function(month, month_ref)
+{
+	if (month_ref %in% c(11,12,1,2))
+		return (month %in% c(11,12,1,2))
+	if (month_ref %in% c(3,4,9,10))
+		return (month %in% c(3,4,9,10))
+	return (month %in% c(5,6,7,8))
 }
 
+#TODO:
+#distinction lun-jeudi, puis ven, sam, dim
+#isSameDay = function(day, day_ref)
+#{
+#	if (day_ref == 
+
 #' getNoNA2
 #'
 #' Get indices in data of no-NA series followed by no-NA, within [first,last] range.
diff --git a/reports/report.ipynb b/reports/report.ipynb
index 899fbf6..3dac8ec 100644
--- a/reports/report.ipynb
+++ b/reports/report.ipynb
@@ -59,8 +59,6 @@
     "editable": true
    },
    "source": [
-    "\n",
-    "\n",
     "<h2 style=\"color:blue;font-size:2em\">Pollution par chauffage</h2>"
    ]
   },
@@ -75,12 +73,34 @@
    "outputs": [],
    "source": [
     "reload(\"../pkg\")\n",
-    "p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n",
-    "p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n",
+    "#p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n",
+    "#p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n",
+    "#p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n",
     "p4 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"exo\")\n",
     "p5 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")\n"
+    "p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "getSimilarDaysIndices(1000,10,TRUE,data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "as.POSIXlt(data$getTime(1000)[1])"
    ]
   },
   {
@@ -110,17 +130,7 @@
    },
    "outputs": [],
    "source": [
-    "e_nn_exo = computeError(data, p_nn_exo, 3)\n",
-    "e_nn_mix = computeError(data, p_nn_mix, 3)\n",
-    "e_az = computeError(data, p_az, 3)\n",
-    "e_pz = computeError(data, p_pz, 3)\n",
-    "options(repr.plot.width=9, repr.plot.height=7)\n",
-    "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n",
-    "\n",
-    "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n",
-    "\n",
-    "i_np = which.min(e_nn_exo$abs$indices)\n",
-    "i_p = which.max(e_nn_exo$abs$indices)"
+    "plotError(list(e4,e1,e2,e3, e5,e6), cols=c(1,2,3,4,5,6))"
    ]
   },
   {
-- 
2.44.0