From d444b27afb654f1aa435a0b237b91f8e11c37a6b Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Thu, 16 Mar 2017 20:20:33 +0100 Subject: [PATCH] fix ipynb_generator.py --- .gitignore | 1 + reports/ipynb_generator.py | 6 +- reports/report_7h_H3.ipynb | 548 +++++++++++++++++++++++++++++++++++++ 3 files changed, 551 insertions(+), 4 deletions(-) create mode 100644 reports/report_7h_H3.ipynb diff --git a/.gitignore b/.gitignore index c4370d2..6bf4539 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ data/*.csv NAMESPACE /reports/*.html /pkg/vignettes/*.html +nohup.out diff --git a/reports/ipynb_generator.py b/reports/ipynb_generator.py index 339f96a..ce546ad 100755 --- a/reports/ipynb_generator.py +++ b/reports/ipynb_generator.py @@ -63,9 +63,7 @@ def read(text, argv=sys.argv[3:]): if shortname: # Check if code is to be typeset as static # Markdown code (e.g., shortname=py-t) - .format(shortname)) astext = shortname[-2:] == '-t' - .format(astext, shortname)) if astext: # Markdown shortname = shortname[:-2] @@ -123,7 +121,7 @@ def driver(): """Compile a document and its variables.""" try: inputfile = sys.argv[1] - with open(filename, 'r') as f: + with open(inputfile, 'r') as f: text = f.read() outputfile = '-' if len(sys.argv) <= 2 else sys.argv[2] except (IndexError, IOError) as e: @@ -134,7 +132,7 @@ def driver(): filestr = write(cells) # Assuming file extension .gj (generate Jupyter); TODO: less strict outputfile = inputfile[:-3]+'.ipynb' if outputfile == '-' else outputfile - with open(filename, 'w') as f: + with open(outputfile, 'w') as f: f.write(filestr) if __name__ == '__main__': diff --git a/reports/report_7h_H3.ipynb b/reports/report_7h_H3.ipynb new file mode 100644 index 0000000..5cb789a --- /dev/null +++ b/reports/report_7h_H3.ipynb @@ -0,0 +1,548 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "

Introduction

\n", + "\n", + "J'ai fait quelques essais dans différentes configurations pour la méthode \"Neighbors\"\n", + "(la seule dont on a parlé).
Il semble que le mieux soit\n", + "\n", + " * simtype=\"exo\" ou \"mix\" : similarités exogènes avec/sans endogènes (fenêtre optimisée par VC)\n", + " * same_season=FALSE : les indices pour la validation croisée ne tiennent pas compte des saisons\n", + " * mix_strategy=\"mult\" : on multiplie les poids (au lieu d'en éteindre)\n", + "\n", + "J'ai systématiquement comparé à une approche naïve : la moyennes des lendemains des jours\n", + "\"similaires\" dans tout le passé ; à chaque fois sans prédiction du saut (sauf pour Neighbors :\n", + "prédiction basée sur les poids calculés).\n", + "\n", + "Ensuite j'affiche les erreurs, quelques courbes prévues/mesurées, quelques filaments puis les\n", + "histogrammes de quelques poids. Concernant les graphes de filaments, la moitié gauche du graphe\n", + "correspond aux jours similaires au jour courant, tandis que la moitié droite affiche les\n", + "lendemains : ce sont donc les voisinages tels qu'utilisés dans l'algorithme.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "library(talweg)\n", + "\n", + "ts_data = read.csv(system.file(\"extdata\",\"pm10_mesures_H_loc_report.csv\",package=\"talweg\"))\n", + "exo_data = read.csv(system.file(\"extdata\",\"meteo_extra_noNAs.csv\",package=\"talweg\"))\n", + "data = getData(ts_data, exo_data, input_tz = \"Europe/Paris\", working_tz=\"Europe/Paris\",\n", + "\tpredict_at=7) #predict from P+1 to P+H included\n", + "\n", + "indices_ch = seq(as.Date(\"2015-01-18\"),as.Date(\"2015-01-24\"),\"days\")\n", + "indices_ep = seq(as.Date(\"2015-03-15\"),as.Date(\"2015-03-21\"),\"days\")\n", + "indices_np = seq(as.Date(\"2015-04-26\"),as.Date(\"2015-05-02\"),\"days\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "

Pollution par chauffage

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p_nn_exo = computeForecast(data, indices_ch, \"Neighbors\", \"Neighbors\",\n", + "\thorizon=3, simtype=\"exo\")\n", + "p_nn_mix = computeForecast(data, indices_ch, \"Neighbors\", \"Neighbors\",\n", + "\thorizon=3, simtype=\"mix\")\n", + "p_az = computeForecast(data, indices_ch, \"Average\", \"Zero\",\n", + "\thorizon=3)\n", + "p_pz = computeForecast(data, indices_ch, \"Persistence\", \"Zero\",\n", + "\thorizon=3, same_day=TRUE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e_nn_exo = computeError(data, p_nn_exo, 3)\n", + "e_nn_mix = computeError(data, p_nn_mix, 3)\n", + "e_az = computeError(data, p_az, 3)\n", + "e_pz = computeError(data, p_pz, 3)\n", + "options(repr.plot.width=9, repr.plot.height=7)\n", + "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n", + "\n", + "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n", + "\n", + "i_np = which.min(e_nn_exo$abs$indices)\n", + "i_p = which.max(e_nn_exo$abs$indices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "options(repr.plot.width=9, repr.plot.height=4)\n", + "par(mfrow=c(1,2))\n", + "\n", + "plotPredReal(data, p_nn_exo, i_np); title(paste(\"PredReal nn exo day\",i_np))\n", + "plotPredReal(data, p_nn_exo, i_p); title(paste(\"PredReal nn exo day\",i_p))\n", + "\n", + "plotPredReal(data, p_nn_mix, i_np); title(paste(\"PredReal nn mix day\",i_np))\n", + "plotPredReal(data, p_nn_mix, i_p); title(paste(\"PredReal nn mix day\",i_p))\n", + "\n", + "plotPredReal(data, p_az, i_np); title(paste(\"PredReal az day\",i_np))\n", + "plotPredReal(data, p_az, i_p); title(paste(\"PredReal az day\",i_p))\n", + "\n", + "# Bleu: prévue, noir: réalisée" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "f_np_exo = computeFilaments(data, p_nn_exo, i_np, plot=TRUE); title(paste(\"Filaments nn exo day\",i_np))\n", + "f_p_exo = computeFilaments(data, p_nn_exo, i_p, plot=TRUE); title(paste(\"Filaments nn exo day\",i_p))\n", + "\n", + "f_np_mix = computeFilaments(data, p_nn_mix, i_np, plot=TRUE); title(paste(\"Filaments nn mix day\",i_np))\n", + "f_p_mix = computeFilaments(data, p_nn_mix, i_p, plot=TRUE); title(paste(\"Filaments nn mix day\",i_p))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotFilamentsBox(data, f_np_exo); title(paste(\"FilBox nn exo day\",i_np))\n", + "plotFilamentsBox(data, f_p_exo); title(paste(\"FilBox nn exo day\",i_p))\n", + "\n", + "plotFilamentsBox(data, f_np_mix); title(paste(\"FilBox nn mix day\",i_np))\n", + "plotFilamentsBox(data, f_p_mix); title(paste(\"FilBox nn mix day\",i_p))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotRelVar(data, f_np_exo); title(paste(\"StdDev nn exo day\",i_np))\n", + "plotRelVar(data, f_p_exo); title(paste(\"StdDev nn exo day\",i_p))\n", + "\n", + "plotRelVar(data, f_np_mix); title(paste(\"StdDev nn mix day\",i_np))\n", + "plotRelVar(data, f_p_mix); title(paste(\"StdDev nn mix day\",i_p))\n", + "\n", + "# Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotSimils(p_nn_exo, i_np); title(paste(\"Weights nn exo day\",i_np))\n", + "plotSimils(p_nn_exo, i_p); title(paste(\"Weights nn exo day\",i_p))\n", + "\n", + "plotSimils(p_nn_mix, i_np); title(paste(\"Weights nn mix day\",i_np))\n", + "plotSimils(p_nn_mix, i_p); title(paste(\"Weights nn mix day\",i_p))\n", + "\n", + "# - pollué à gauche, + pollué à droite" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Fenêtres sélectionnées dans ]0,10] / endo à gauche, exo à droite\n", + "p_nn_exo$getParams(i_np)$window\n", + "p_nn_exo$getParams(i_p)$window\n", + "\n", + "p_nn_mix$getParams(i_np)$window\n", + "p_nn_mix$getParams(i_p)$window" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "

Pollution par épandage

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p_nn_exo = computeForecast(data, indices_ep, \"Neighbors\", \"Neighbors\",\n", + "\thorizon=3, simtype=\"exo\")\n", + "p_nn_mix = computeForecast(data, indices_ep, \"Neighbors\", \"Neighbors\",\n", + "\thorizon=3, simtype=\"mix\")\n", + "p_az = computeForecast(data, indices_ep, \"Average\", \"Zero\",\n", + "\thorizon=3)\n", + "p_pz = computeForecast(data, indices_ep, \"Persistence\", \"Zero\",\n", + "\thorizon=3, same_day=TRUE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e_nn_exo = computeError(data, p_nn_exo, 3)\n", + "e_nn_mix = computeError(data, p_nn_mix, 3)\n", + "e_az = computeError(data, p_az, 3)\n", + "e_pz = computeError(data, p_pz, 3)\n", + "options(repr.plot.width=9, repr.plot.height=7)\n", + "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n", + "\n", + "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n", + "\n", + "i_np = which.min(e_nn_exo$abs$indices)\n", + "i_p = which.max(e_nn_exo$abs$indices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "options(repr.plot.width=9, repr.plot.height=4)\n", + "par(mfrow=c(1,2))\n", + "\n", + "plotPredReal(data, p_nn_exo, i_np); title(paste(\"PredReal nn exo day\",i_np))\n", + "plotPredReal(data, p_nn_exo, i_p); title(paste(\"PredReal nn exo day\",i_p))\n", + "\n", + "plotPredReal(data, p_nn_mix, i_np); title(paste(\"PredReal nn mix day\",i_np))\n", + "plotPredReal(data, p_nn_mix, i_p); title(paste(\"PredReal nn mix day\",i_p))\n", + "\n", + "plotPredReal(data, p_az, i_np); title(paste(\"PredReal az day\",i_np))\n", + "plotPredReal(data, p_az, i_p); title(paste(\"PredReal az day\",i_p))\n", + "\n", + "# Bleu: prévue, noir: réalisée" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "f_np_exo = computeFilaments(data, p_nn_exo, i_np, plot=TRUE); title(paste(\"Filaments nn exo day\",i_np))\n", + "f_p_exo = computeFilaments(data, p_nn_exo, i_p, plot=TRUE); title(paste(\"Filaments nn exo day\",i_p))\n", + "\n", + "f_np_mix = computeFilaments(data, p_nn_mix, i_np, plot=TRUE); title(paste(\"Filaments nn mix day\",i_np))\n", + "f_p_mix = computeFilaments(data, p_nn_mix, i_p, plot=TRUE); title(paste(\"Filaments nn mix day\",i_p))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotFilamentsBox(data, f_np_exo); title(paste(\"FilBox nn exo day\",i_np))\n", + "plotFilamentsBox(data, f_p_exo); title(paste(\"FilBox nn exo day\",i_p))\n", + "\n", + "plotFilamentsBox(data, f_np_mix); title(paste(\"FilBox nn mix day\",i_np))\n", + "plotFilamentsBox(data, f_p_mix); title(paste(\"FilBox nn mix day\",i_p))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotRelVar(data, f_np_exo); title(paste(\"StdDev nn exo day\",i_np))\n", + "plotRelVar(data, f_p_exo); title(paste(\"StdDev nn exo day\",i_p))\n", + "\n", + "plotRelVar(data, f_np_mix); title(paste(\"StdDev nn mix day\",i_np))\n", + "plotRelVar(data, f_p_mix); title(paste(\"StdDev nn mix day\",i_p))\n", + "\n", + "# Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotSimils(p_nn_exo, i_np); title(paste(\"Weights nn exo day\",i_np))\n", + "plotSimils(p_nn_exo, i_p); title(paste(\"Weights nn exo day\",i_p))\n", + "\n", + "plotSimils(p_nn_mix, i_np); title(paste(\"Weights nn mix day\",i_np))\n", + "plotSimils(p_nn_mix, i_p); title(paste(\"Weights nn mix day\",i_p))\n", + "\n", + "# - pollué à gauche, + pollué à droite" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Fenêtres sélectionnées dans ]0,10] / endo à gauche, exo à droite\n", + "p_nn_exo$getParams(i_np)$window\n", + "p_nn_exo$getParams(i_p)$window\n", + "\n", + "p_nn_mix$getParams(i_np)$window\n", + "p_nn_mix$getParams(i_p)$window" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "

Semaine non polluée

" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "p_nn_exo = computeForecast(data, indices_np, \"Neighbors\", \"Neighbors\",\n", + "\thorizon=3, simtype=\"exo\")\n", + "p_nn_mix = computeForecast(data, indices_np, \"Neighbors\", \"Neighbors\",\n", + "\thorizon=3, simtype=\"mix\")\n", + "p_az = computeForecast(data, indices_np, \"Average\", \"Zero\",\n", + "\thorizon=3)\n", + "p_pz = computeForecast(data, indices_np, \"Persistence\", \"Zero\",\n", + "\thorizon=3, same_day=TRUE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e_nn_exo = computeError(data, p_nn_exo, 3)\n", + "e_nn_mix = computeError(data, p_nn_mix, 3)\n", + "e_az = computeError(data, p_az, 3)\n", + "e_pz = computeError(data, p_pz, 3)\n", + "options(repr.plot.width=9, repr.plot.height=7)\n", + "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n", + "\n", + "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n", + "\n", + "i_np = which.min(e_nn_exo$abs$indices)\n", + "i_p = which.max(e_nn_exo$abs$indices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "options(repr.plot.width=9, repr.plot.height=4)\n", + "par(mfrow=c(1,2))\n", + "\n", + "plotPredReal(data, p_nn_exo, i_np); title(paste(\"PredReal nn exo day\",i_np))\n", + "plotPredReal(data, p_nn_exo, i_p); title(paste(\"PredReal nn exo day\",i_p))\n", + "\n", + "plotPredReal(data, p_nn_mix, i_np); title(paste(\"PredReal nn mix day\",i_np))\n", + "plotPredReal(data, p_nn_mix, i_p); title(paste(\"PredReal nn mix day\",i_p))\n", + "\n", + "plotPredReal(data, p_az, i_np); title(paste(\"PredReal az day\",i_np))\n", + "plotPredReal(data, p_az, i_p); title(paste(\"PredReal az day\",i_p))\n", + "\n", + "# Bleu: prévue, noir: réalisée" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "f_np_exo = computeFilaments(data, p_nn_exo, i_np, plot=TRUE); title(paste(\"Filaments nn exo day\",i_np))\n", + "f_p_exo = computeFilaments(data, p_nn_exo, i_p, plot=TRUE); title(paste(\"Filaments nn exo day\",i_p))\n", + "\n", + "f_np_mix = computeFilaments(data, p_nn_mix, i_np, plot=TRUE); title(paste(\"Filaments nn mix day\",i_np))\n", + "f_p_mix = computeFilaments(data, p_nn_mix, i_p, plot=TRUE); title(paste(\"Filaments nn mix day\",i_p))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotFilamentsBox(data, f_np_exo); title(paste(\"FilBox nn exo day\",i_np))\n", + "plotFilamentsBox(data, f_p_exo); title(paste(\"FilBox nn exo day\",i_p))\n", + "\n", + "plotFilamentsBox(data, f_np_mix); title(paste(\"FilBox nn mix day\",i_np))\n", + "plotFilamentsBox(data, f_p_mix); title(paste(\"FilBox nn mix day\",i_p))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotRelVar(data, f_np_exo); title(paste(\"StdDev nn exo day\",i_np))\n", + "plotRelVar(data, f_p_exo); title(paste(\"StdDev nn exo day\",i_p))\n", + "\n", + "plotRelVar(data, f_np_mix); title(paste(\"StdDev nn mix day\",i_np))\n", + "plotRelVar(data, f_p_mix); title(paste(\"StdDev nn mix day\",i_p))\n", + "\n", + "# Variabilité globale en rouge ; sur les 60 voisins (+ lendemains) en noir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "par(mfrow=c(1,2))\n", + "plotSimils(p_nn_exo, i_np); title(paste(\"Weights nn exo day\",i_np))\n", + "plotSimils(p_nn_exo, i_p); title(paste(\"Weights nn exo day\",i_p))\n", + "\n", + "plotSimils(p_nn_mix, i_np); title(paste(\"Weights nn mix day\",i_np))\n", + "plotSimils(p_nn_mix, i_p); title(paste(\"Weights nn mix day\",i_p))\n", + "\n", + "# - pollué à gauche, + pollué à droite" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Fenêtres sélectionnées dans ]0,10] / endo à gauche, exo à droite\n", + "p_nn_exo$getParams(i_np)$window\n", + "p_nn_exo$getParams(i_p)$window\n", + "\n", + "p_nn_mix$getParams(i_np)$window\n", + "p_nn_mix$getParams(i_p)$window" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "

Bilan

\n", + "\n", + "Problème difficile : on ne fait guère mieux qu'une naïve moyenne des lendemains des jours\n", + "similaires dans le passé, ce qui n'est pas loin de prédire une série constante égale à la\n", + "dernière valeur observée (méthode \"zéro\"). La persistence donne parfois de bons résultats\n", + "mais est trop instable (sensibilité à l'argument same_day).\n", + "\n", + "Comment améliorer la méthode ?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- 2.44.0