From 841b7f5a10742afa28707ebf7dca273b38d8b8e4 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Tue, 21 Feb 2017 01:43:37 +0100
Subject: [PATCH] fix plot quantiles

---
 pkg/R/plot.R                               |  6 +--
 reports/report_2017-03-01.7h_average.ipynb | 56 +++++++++++++---------
 2 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/pkg/R/plot.R b/pkg/R/plot.R
index e414798..7b5f2b5 100644
--- a/pkg/R/plot.R
+++ b/pkg/R/plot.R
@@ -8,12 +8,12 @@
 #' @export
 plotCurves <- function(data, indices=seq_len(data$getSize()))
 {
-	yrange = quantile( range( sapply( indices, function(i) {
+	yrange = quantile( sapply( indices, function(i) {
 		serie = c(data$getCenteredSerie(i))
 		if (!all(is.na(serie)))
 			range(serie, na.rm=TRUE)
 		c()
-	}) ), probs=c(0.05,0.95) )
+	}), probs=c(0.05,0.95) )
 	par(mar=c(4.7,5,1,1), cex.axis=1.5, cex.lab=1.5)
 	for (i in seq_along(indices))
 	{
@@ -75,7 +75,7 @@ computeFilaments <- function(data, index, limit=60, plot=TRUE)
 		sqrt( sum( (ref_serie - data$getCenteredSerie(i))^2 ) / L )
 	})
 	indices = sort(distances, index.return=TRUE)$ix[1:min(limit,length(distances))]
-	yrange = quantile( range( ref_serie, sapply( indices, function(i) {
+	yrange = quantile( c(ref_serie, sapply( indices, function(i) {
 		ii = fdays_indices[i]
 		serie = c(data$getCenteredSerie(ii), data$getCenteredSerie(ii+1))
 		if (!all(is.na(serie)))
diff --git a/reports/report_2017-03-01.7h_average.ipynb b/reports/report_2017-03-01.7h_average.ipynb
index eceb2d0..6296e29 100644
--- a/reports/report_2017-03-01.7h_average.ipynb
+++ b/reports/report_2017-03-01.7h_average.ipynb
@@ -54,8 +54,7 @@
     "p_ch_nn = getForecast(data,indices,\"Neighbors\",\"Neighbors\",simtype=\"mix\",same_season=FALSE,mix_strategy=\"mult\")\n",
     "p_ch_pz = getForecast(data, indices, \"Persistence\", \"Zero\", same_day=TRUE)\n",
     "p_ch_az = getForecast(data, indices, \"Average\", \"Zero\") #, memory=183)\n",
-    "#p_ch_zz = getForecast(data, indices, \"Zero\", \"Zero\")\n",
-    "#p_ch_l = getForecast(data, indices, \"Level\", same_day=FALSE)"
+    "#p_ch_zz = getForecast(data, indices, \"Zero\", \"Zero\")"
    ]
   },
   {
@@ -70,8 +69,7 @@
     "e_ch_pz = getError(data, p_ch_pz)\n",
     "e_ch_az = getError(data, p_ch_az)\n",
     "#e_ch_zz = getError(data, p_ch_zz)\n",
-    "#e_ch_l = getError(data, p_ch_l)\n",
-    "options(repr.plot.width=9, repr.plot.height=6)\n",
+    "options(repr.plot.width=9, repr.plot.height=7)\n",
     "plotError(list(e_ch_nn, e_ch_pz, e_ch_az), cols=c(1,2,colors()[258]))\n",
     "\n",
     "#Noir: neighbors, rouge: persistence, vert: moyenne"
@@ -149,18 +147,21 @@
    "outputs": [],
    "source": [
     "par(mfrow=c(2,2))\n",
-    "options(repr.plot.width=9, repr.plot.height=6)\n",
+    "options(repr.plot.width=9, repr.plot.height=7)\n",
     "plotFilamentsBox(data, f3$indices)\n",
     "plotFilamentsBox(data, f3$indices+1)\n",
     "plotFilamentsBox(data, f4$indices)\n",
-    "plotFilamentsBox(data, f4$indices+1)"
+    "plotFilamentsBox(data, f4$indices+1)\n",
+    "\n",
+    "#En haut : jour 3 + lendemain (4) ; en bas : jour 4 + lendemain (5)\n",
+    "#À gauche : premières 24h ; à droite : 24h suivantes"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Beaucoup de courbes similaires dans le cas peu pollué, très peu pour un jour pollué."
+    "Dans les deux cas, un voisinage \"raisonnable\" est trouvé ; mais grande variabilité le lendemain \"pollué\"."
    ]
   },
   {
@@ -173,8 +174,31 @@
    "source": [
     "par(mfrow=c(1,2))\n",
     "options(repr.plot.width=9, repr.plot.height=4)\n",
+    "plotRelativeVariability(data, f3$indices)\n",
+    "plotRelativeVariability(data, f4$indices)\n",
+    "\n",
+    "#Variabilité sur 60 courbes au hasard en rouge ; sur nos 60 voisins (+ lendemains) en noir"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Dans la situation idéale, il faudrait que la courbe noire soit nettement plus basse que la courbe rouge. Ce n'est pas très clair à J+1, surtout en soirée (de 21h à 0h)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "par(mfrow=c(1,3))\n",
     "plotSimils(p_ch_nn, 3)\n",
     "plotSimils(p_ch_nn, 4)\n",
+    "plotSimils(p_ch_nn, 5)\n",
     "\n",
     "#Non pollué à gauche, pollué au milieu, autre pollué à droite"
    ]
@@ -196,7 +220,7 @@
    "source": [
     "par(mfrow=c(1,2))\n",
     "plotPredReal(data, p_ch_nn, 5)\n",
-    "plotFilaments(data, p_ch_nn$getIndexInData(5))"
+    "ignored <- computeFilaments(data, p_ch_nn$getIndexInData(5), plot=TRUE)"
    ]
   },
   {
@@ -218,19 +242,7 @@
     "p_ep_nn = getForecast(data,indices,\"Neighbors\",\"Neighbors\",simtype=\"mix\",same_season=FALSE,mix_strategy=\"mult\")\n",
     "p_ep_pz = getForecast(data, indices, \"Persistence\", \"Zero\", same_day=TRUE)\n",
     "p_ep_az = getForecast(data, indices, \"Average\", \"Zero\") #, memory=183)\n",
-    "#p_ep_zz = getForecast(data, indices, \"Zero\", \"Zero\")\n",
-    "#p_ep_l = getForecast(data, indices, \"Level\", same_day=TRUE)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "class(p_ep_nn)"
+    "#p_ep_zz = getForecast(data, indices, \"Zero\", \"Zero\")"
    ]
   },
   {
@@ -246,7 +258,7 @@
     "e_ep_az = getError(data, p_ep_az)\n",
     "#e_ep_zz = getError(data, p_ep_zz)\n",
     "#e_ep_l = getError(data, p_ep_l)\n",
-    "options(repr.plot.width=9, repr.plot.height=6)\n",
+    "options(repr.plot.width=9, repr.plot.height=7)\n",
     "plotError(list(e_ep_nn, e_ep_pz, e_ep_az), cols=c(1,2,colors()[258]))\n",
     "\n",
     "#Noir: neighbors, rouge: persistence, vert: moyenne"
-- 
2.44.0