From: Benjamin Auder <benjamin.auder@somewhere>
Date: Thu, 16 Feb 2017 02:55:08 +0000 (+0100)
Subject: improvements, updated report
X-Git-Url: https://git.auder.net/doc/html/pieces/%7B%7B%20path%28%27fos_user_profile_show%27%29%20%7D%7D?a=commitdiff_plain;h=e5aa669afc0b71278d1a864fb0d4e2aff8032ef1;p=talweg.git

improvements, updated report
---

diff --git a/R/F_Level.R b/R/F_Level.R
index e0132ae..a186c1a 100644
--- a/R/F_Level.R
+++ b/R/F_Level.R
@@ -2,8 +2,7 @@
 #'
 #' @title Level Forecaster
 #'
-#' @description Return flat serie of last observed level (on similar day).
-#'   Inherits \code{\link{ShapeForecaster}}
+#' @description Return flat serie of last observed level. Inherits \code{\link{ShapeForecaster}}
 LevelForecaster = setRefClass(
 	Class = "LevelForecaster",
 	contains = "Forecaster",
@@ -13,39 +12,23 @@ LevelForecaster = setRefClass(
 		{
 			callSuper(...)
 		},
-		predict = function(today, memory, horizon, all_memory=TRUE, ...)
+		predict = function(today, memory, horizon, ...)
 		{
-			#return last (similar) day level, or on all memory if all_memory==TRUE
+			#return last day level
 			first_day = max(1, today-memory)
-			index = today-7 + 1
-			if (all_memory)
-			{
-				sum_level = 0.
-				nb_series = 0
-			}
+			same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE)
+			index = today - ifelse(same_day,6,0)
 			repeat
 			{
 				{
-					last_similar_serie = data$getSerie(index)[1:horizon]
-					index = index - 7
+					last_serie = data$getSerie(index)[1:horizon]
+					index = index - ifelse(same_day,7,1)
 				};
 				#TODO: next test is too strict
-				if (!any(is.na(last_similar_serie)))
-				{
-					if (all_memory)
-					{
-						sum_level = sum_level + mean(last_similar_serie)
-						nb_series = nb_series + 1
-					}
-					else
-						return (rep(mean(last_similar_serie), horizon))
-				};
+				if (!any(is.na(last_serie)))
+					return (rep(mean(last_serie), horizon));
 				if (index < first_day)
-				{
-					if (all_memory)
-						return (rep(sum_level / nb_series, horizon))
 					return (NA)
-				}
 			}
 		}
 	)
diff --git a/R/F_Persistence.R b/R/F_Persistence.R
index f078484..19cf2e4 100644
--- a/R/F_Persistence.R
+++ b/R/F_Persistence.R
@@ -2,7 +2,7 @@
 #'
 #' @title Persistence Forecaster
 #'
-#' @description Return the last centered last (similar) day curve.
+#' @description Return the last centered (similar) day curve.
 #'   Inherits \code{\link{Forecaster}}
 PersistenceForecaster = setRefClass(
 	Class = "PersistenceForecaster",
@@ -15,17 +15,19 @@ PersistenceForecaster = setRefClass(
 		},
 		predictShape = function(today, memory, horizon, ...)
 		{
-			#return centered last (similar) day curve, avoiding NAs until memory is run
+			# Return centered last (similar) day curve, avoiding NAs until memory is run
 			first_day = max(1, today-memory)
-			index = today-7 + 1
+			same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE)
+			# If 'same_day', get the last known future of similar day: -7 + 1 == -6
+			index = today - ifelse(same_day,6,0)
 			repeat
 			{
 				{
-					last_similar_serie = data$getCenteredSerie(index)[1:horizon]
-					index = index - 7
+					last_serie = data$getCenteredSerie(index)[1:horizon]
+					index = index - ifelse(same_day,7,1)
 				};
-				if (!any(is.na(last_similar_serie)))
-					return (last_similar_serie);
+				if (!any(is.na(last_serie)))
+					return (last_serie);
 				if (index < first_day)
 					return (NA)
 			}
diff --git a/R/J_Persistence.R b/R/J_Persistence.R
index 744b42a..8ac597c 100644
--- a/R/J_Persistence.R
+++ b/R/J_Persistence.R
@@ -6,16 +6,17 @@ getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...)
 {
 	#return gap between end of similar day curve and first day of tomorrow (in the past)
 	first_day = max(1, today-memory)
-	index = today-7
+	same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE)
+	index = today - ifelse(same_day,7,1)
 	repeat
 	{
 		{
-			last_similar_serie_end = tail( data$getCenteredSerie(index), 1)
-			last_similar_tomorrow_begin = data$getCenteredSerie(index+1)[1]
-			index = index - 7
+			last_serie_end = tail( data$getSerie(index), 1)
+			last_tomorrow_begin = data$getSerie(index+1)[1]
+			index = index - ifelse(same_day,7,1)
 		};
-		if (!is.na(last_similar_serie_end) && !is.na(last_similar_tomorrow_begin))
-			return (last_similar_tomorrow_begin - last_similar_serie_end);
+		if (!is.na(last_serie_end) && !is.na(last_tomorrow_begin))
+			return (last_tomorrow_begin - last_serie_end);
 		if (index < first_day)
 			return (NA)
 	}
diff --git a/R/getForecast.R b/R/getForecast.R
index fde8e45..c07be7c 100644
--- a/R/getForecast.R
+++ b/R/getForecast.R
@@ -37,7 +37,7 @@
 #'   #do_something_with_pred
 #' }}
 #' @export
-getForecast = function(data, indices, forecaster, pjump,
+getForecast = function(data, indices, forecaster, pjump=NULL,
 	memory=Inf, horizon=data$getStdHorizon(), ...)
 {
 	# (basic) Arguments sanity checks
@@ -48,12 +48,16 @@ getForecast = function(data, indices, forecaster, pjump,
 	if (any(indices<=0 | indices>data$getSize()))
 		stop("Indices out of range")
 	indices = sapply(indices, dateIndexToInteger, data)
-	if (!is.character(forecaster) || !is.character(pjump))
-		stop("forecaster and pjump should be of class character")
+	if (!is.character(forecaster))
+		stop("forecaster (name) should be of class character") #pjump could be NULL
 
 	pred = list()
 	forecaster = new(paste(forecaster,"Forecaster",sep=""), data=data,
-		pjump = getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg"))
+		pjump =
+			if (is.null(pjump))
+				function() {}
+			else
+				getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg"))
 	for (today in indices)
 	{
 		pred[[length(pred)+1]] = list(
diff --git a/R/plot.R b/R/plot.R
index 9a0dbcd..b720e9a 100644
--- a/R/plot.R
+++ b/R/plot.R
@@ -71,7 +71,7 @@ plotFilaments <- function(data, index, limit=60)
 		index = i - first_day + 1
 		serie = c(data$getCenteredSerie(index), data$getCenteredSerie(index+1))
 		if (!all(is.na(serie)))
-			range(serie, na.rm=TRUE)
+			return (range(serie, na.rm=TRUE))
 		c()
 	}) )
 	grays = gray.colors(20, 0.1, 0.9) #TODO: 20 == magic number
diff --git a/reports/report_2017-03-01.ipynb b/reports/report_2017-03-01.ipynb
index 3e58706..26f4682 100644
--- a/reports/report_2017-03-01.ipynb
+++ b/reports/report_2017-03-01.ipynb
@@ -35,7 +35,7 @@
     " * same_season=FALSE : les indices pour la validation croisÃ©e ne tiennent pas compte des saisons\n",
     " * mix_strategy=\"mult\" : on multiplie les poids (au lieu d'en Ã©teindre)\n",
     "\n",
-    "J'ai systÃ©matiquement comparÃ© Ã  deux autres approches : la persistence et la moyenne de tous les futurs des jours similaires du passÃ© ; Ã  chaque fois sans prÃ©diction du saut (sauf pour Neighbors : prÃ©diction basÃ©e sur les poids calculÃ©s).\n",
+    "J'ai systÃ©matiquement comparÃ© Ã  deux autres approches : la persistence et la rÃ©pÃ©tition de la derniÃ¨re valeur observÃ©e (sur tout l'horizon, donc \"zero\") ; Ã  chaque fois sans prÃ©diction du saut (sauf pour Neighbors : prÃ©diction basÃ©e sur les poids calculÃ©s).\n",
     "\n",
     "Ensuite j'affiche les erreurs, quelques courbes prÃ©vues/mesurÃ©es, quelques filaments puis les histogrammes de quelques poids. Concernant les graphes de filaments, la moitiÃ© gauche du graphe correspond aux jours similaires au jour courant, tandis que la moitiÃ© droite affiche les lendemains : ce sont donc les voisinages tels qu'utilisÃ©s dans l'algorithme.\n",
     "\n",
@@ -52,9 +52,10 @@
    "source": [
     "indices = seq(as.Date(\"2015-01-18\"),as.Date(\"2015-01-24\"),\"days\")\n",
     "p_ch_nn = getForecast(data,indices,\"Neighbors\",\"Neighbors\",simtype=\"mix\",same_season=FALSE,mix_strategy=\"mult\")\n",
-    "p_ch_pz = getForecast(data, indices, \"Persistence\", \"Zero\")\n",
-    "p_ch_az = getForecast(data, indices, \"Average\", \"Zero\")\n",
-    "p_ch_zz = getForecast(data, indices, \"Zero\", \"Zero\")"
+    "p_ch_pz = getForecast(data, indices, \"Persistence\", \"Zero\", same_day=TRUE)\n",
+    "#p_ch_az = getForecast(data, indices, \"Average\", \"Zero\")\n",
+    "p_ch_zz = getForecast(data, indices, \"Zero\", \"Zero\")\n",
+    "#p_ch_l = getForecast(data, indices, \"Level\", same_day=FALSE)"
    ]
   },
   {
@@ -67,18 +68,20 @@
    "source": [
     "e_ch_nn = getError(data, p_ch_nn)\n",
     "e_ch_pz = getError(data, p_ch_pz)\n",
-    "e_ch_az = getError(data, p_ch_az)\n",
+    "#e_ch_az = getError(data, p_ch_az)\n",
+    "e_ch_zz = getError(data, p_ch_zz)\n",
+    "#e_ch_l = getError(data, p_ch_l)\n",
     "options(repr.plot.width=9, repr.plot.height=6)\n",
-    "plotError(list(e_ch_nn, e_ch_pz, e_ch_az), cols=c(1,2,colors()[258]))\n",
+    "plotError(list(e_ch_nn, e_ch_pz, e_ch_zz), cols=c(1,2,colors()[258]))\n",
     "\n",
-    "#Noir: neighbors, rouge: persistence, vert: moyenne"
+    "#Noir: neighbors, rouge: persistence, vert: zero"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "La mÃ©thode Neighbors fait assez nettement mieux qu'une simple moyenne dans ce cas."
+    "La mÃ©thode Neighbors fait assez nettement mieux que les autres dans ce cas."
    ]
   },
   {
@@ -177,10 +180,10 @@
    "source": [
     "indices = seq(as.Date(\"2015-03-15\"),as.Date(\"2015-03-21\"),\"days\")\n",
     "p_ep_nn = getForecast(data,indices,\"Neighbors\",\"Neighbors\",simtype=\"mix\",same_season=FALSE,mix_strategy=\"mult\")\n",
-    "p_ep_pz = getForecast(data, indices, \"Persistence\", \"Zero\")\n",
-    "p_ep_az = getForecast(data, indices, \"Average\", \"Zero\")\n",
+    "p_ep_pz = getForecast(data, indices, \"Persistence\", \"Zero\", same_day=TRUE)\n",
+    "#p_ep_az = getForecast(data, indices, \"Average\", \"Zero\")\n",
     "p_ep_zz = getForecast(data, indices, \"Zero\", \"Zero\")\n",
-    "p_ep_lz = getForecast(data, indices, \"Level\", \"Zero\")"
+    "#p_ep_l = getForecast(data, indices, \"Level\", same_day=TRUE)"
    ]
   },
   {
@@ -193,11 +196,13 @@
    "source": [
     "e_ep_nn = getError(data, p_ep_nn)\n",
     "e_ep_pz = getError(data, p_ep_pz)\n",
-    "e_ep_az = getError(data, p_ep_az)\n",
+    "#e_ep_az = getError(data, p_ep_az)\n",
+    "e_ep_zz = getError(data, p_ep_zz)\n",
+    "#e_ep_l = getError(data, p_ep_l)\n",
     "options(repr.plot.width=9, repr.plot.height=6)\n",
-    "plotError(list(e_ep_nn, e_ep_pz, e_ep_az), cols=c(1,2,colors()[258]))\n",
+    "plotError(list(e_ep_nn, e_ep_pz, e_ep_zz), cols=c(1,2,colors()[258]))\n",
     "\n",
-    "#Noir: neighbors, rouge: persistence, vert: moyenne"
+    "#Noir: neighbors, rouge: persistence, vert: zero"
    ]
   },
   {
@@ -278,10 +283,10 @@
    "source": [
     "indices = seq(as.Date(\"2015-04-26\"),as.Date(\"2015-05-02\"),\"days\")\n",
     "p_np_nn = getForecast(data,indices,\"Neighbors\",\"Neighbors\",simtype=\"mix\",same_season=FALSE,mix_strategy=\"mult\")\n",
-    "p_np_pz = getForecast(data, indices, \"Persistence\", \"Zero\")\n",
-    "p_np_az = getForecast(data, indices, \"Average\", \"Zero\")\n",
+    "p_np_pz = getForecast(data, indices, \"Persistence\", \"Zero\", same_day=FALSE)\n",
+    "#p_np_az = getForecast(data, indices, \"Average\", \"Zero\")\n",
     "p_np_zz = getForecast(data, indices, \"Zero\", \"Zero\")\n",
-    "p_np_lz = getForecast(data, indices, \"Level\", \"Zero\")"
+    "#p_np_l = getForecast(data, indices, \"Level\", same_day=FALSE)"
    ]
   },
   {
@@ -294,18 +299,20 @@
    "source": [
     "e_np_nn = getError(data, p_np_nn)\n",
     "e_np_pz = getError(data, p_np_pz)\n",
-    "e_np_az = getError(data, p_np_az)\n",
+    "#e_np_az = getError(data, p_np_az)\n",
+    "e_np_zz = getError(data, p_np_zz)\n",
+    "#e_np_l = getError(data, p_np_l)\n",
     "options(repr.plot.width=9, repr.plot.height=6)\n",
-    "plotError(list(e_np_nn, e_np_pz, e_np_az), cols=c(1,2,colors()[258]))\n",
+    "plotError(list(e_np_nn, e_np_pz, e_np_zz), cols=c(1,2,colors()[258]))\n",
     "\n",
-    "#Noir: neighbors, rouge: persistence, vert: moyenne"
+    "#Noir: neighbors, rouge: persistence, vert: zero"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Performances des mÃ©thodes \"Average\" et \"Neighbors\" comparables ; mauvais rÃ©sultats pour la persistence."
+    "Performances des mÃ©thodes \"Zero\" et \"Neighbors\" comparables ; mauvais rÃ©sultats pour la persistence."
    ]
   },
   {
@@ -376,7 +383,7 @@
    "source": [
     "## Bilan\n",
     "\n",
-    "ProblÃ¨me difficile : on ne fait guÃ¨re mieux qu'une naÃ¯ve moyenne des lendemains des jours similaires dans le passÃ©.\n",
+    "ProblÃ¨me difficile : on ne fait guÃ¨re mieux qu'une naÃ¯ve moyenne des lendemains des jours similaires dans le passÃ©, ce qui est Ã  peu prÃ¨s Ã©quivalent Ã  prÃ©dire une sÃ©rie constante Ã©gale Ã  la derniÃ¨re valeur observÃ©e (mÃ©thode \"zÃ©ro\"). La persistence donne parfois de bons rÃ©sultats mais est trop instable (sensibilitÃ© Ã  l'argument <code>same_day</code>).\n",
     "\n",
     "Comment amÃ©liorer la mÃ©thode ?"
    ]