From: Benjamin Auder Date: Mon, 30 Jan 2017 00:15:57 +0000 (+0100) Subject: add PDF files through git-fat filters X-Git-Url: https://git.auder.net/doc/current/%7B%7B%20asset%28%27mixstore/css/scripts/%3C?a=commitdiff_plain;h=67058ab80c383ec7369c89e2cbbf5ab38b36fcb2;p=epclust.git add PDF files through git-fat filters --- diff --git a/biblio/ours/ENERGYCON_leuven_GOUDE.pdf b/biblio/ours/ENERGYCON_leuven_GOUDE.pdf new file mode 100644 index 0000000..8943a9a --- /dev/null +++ b/biblio/ours/ENERGYCON_leuven_GOUDE.pdf @@ -0,0 +1 @@ +#$# git-fat 5b47ab0da075bababc52c7a140f2e791fb425306 1486805 diff --git a/biblio/ours/clust-publie.pdf b/biblio/ours/clust-publie.pdf new file mode 100644 index 0000000..5801a64 --- /dev/null +++ b/biblio/ours/clust-publie.pdf @@ -0,0 +1 @@ +#$# git-fat 7571c6c3b737bf2f57eab62fea99eb24a756eded 895937 diff --git a/biblio/ours/cugliari-mathamsud2016.pdf b/biblio/ours/cugliari-mathamsud2016.pdf new file mode 100644 index 0000000..b3a5e8a --- /dev/null +++ b/biblio/ours/cugliari-mathamsud2016.pdf @@ -0,0 +1 @@ +#$# git-fat 96fc0307b651a06a05f81c1f5d0f79a1fbd2bd45 2760474 diff --git a/slides/.gitignore b/slides/.gitignore deleted file mode 100644 index 53d89fc..0000000 --- a/slides/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -#ignore all but this file, and source file -* -!.gitignore -!*.ipynb diff --git a/slides/201701_IRSDIfollowup.pdf b/slides/201701_IRSDIfollowup.pdf new file mode 100644 index 0000000..a1120a0 --- /dev/null +++ b/slides/201701_IRSDIfollowup.pdf @@ -0,0 +1 @@ +#$# git-fat 84fb4f1732c0e17b4f3f2dfb4424a032f10a9000 912206 diff --git a/slides/201701_point.tex b/slides/201701_point.tex new file mode 100644 index 0000000..eebc740 --- /dev/null +++ b/slides/201701_point.tex @@ -0,0 +1,621 @@ +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% +% LOADING DOCUMENT +% +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +\documentclass[10pt]{beamer} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% LOADING PACKAGES +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[francais]{babel} +\usepackage{amsmath} +\usepackage{amsfonts} +\usepackage{amssymb} +\usepackage{graphicx} +\usepackage{booktabs} +\usetheme{default} +\usepackage{tikz} + +\colorlet{darkred}{red!80!black} +\colorlet{darkblue}{blue!80!black} +\colorlet{darkgreen}{green!60!black} + +\usetikzlibrary{calc,decorations.pathmorphing,patterns} +\pgfdeclaredecoration{penciline}{initial}{ + \state{initial}[width=+\pgfdecoratedinputsegmentremainingdistance, + auto corner on length=1mm,]{ + \pgfpathcurveto% + {% From + \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance} + {\pgfdecorationsegmentamplitude} + } + {% Control 1 + \pgfmathrand + \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt}} + {\pgfqpoint{-\pgfdecorationsegmentaspect + \pgfdecoratedinputsegmentremainingdistance}% + {\pgfmathresult\pgfdecorationsegmentamplitude} + } + } + {%TO + \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt}{1pt}} + } + } + \state{final}{} +} +\tikzstyle{block} = [draw,rectangle,thick,minimum height=2em,minimum width=2em] + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% BEAMER OPTIONS +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +%\setbeameroption{show notes} +\setbeamertemplate{navigation symbols}{} +%\setbeamercovered{transparent} +%\AtBeginSection[]{ +%\begin{frame}{Outline} +% \tableofcontents[currentsection] +%\end{frame} +%} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% +% PRESENTATION INFORMATION +% +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\author{B. Auder \and + J. Cugliari \and + Y. Goude \and + J.-M. Poggi +} +\title{Disaggregated Electricity Forecasting using Clustering of + Individual Consumers} +\subtitle{Réunion mi parcours} +%\logo{} +\institute{IRSDI - RESEARCH INITIATIVE IN INDUSTRIAL DATA SCIENCE} +\date{19 janvier 2017} +%\subject{tito} + +\begin{document} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% +% TITLE PAGE +% +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\frame[plain]{\maketitle} +%\maketitle + + +\section{IRSDI follow up meeting} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{The project in a nutshell} +\begin{block}{Context} +\begin{itemize} +\item +Industrial : Electricity load forecasting \& smart grids infrastructure +\item +Academic : curve's shape \& nonparametric function-valued forecast +\item +Past work : clustering with wavelets (RC, Wer), KWF, Enercon +\end{itemize} +\end{block} + +\begin{block}{Aims} +\begin{itemize} +\item evaluate the upscaling capacity of the Energycon strategy +\item adapt KWF to an exogenous variable (e.g. meteorological) +\end{itemize} +\end{block} +\end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Clients hierarchical structure and prediction} + +\begin{columns} +\column{.6\textwidth} +\begin{figure}[!ht]\centering + \includegraphics[width = \textwidth]{pics/schema.png} +\caption{Hierarchical structure of $N$ individual clients among $K$ +groups.}\label{fig:schema-hier} +\end{figure} + +\column{.4\textwidth} +\begin{tikzpicture}[decoration=penciline, decorate] + \node[block, decorate] at (0, 0){$Z_t$} ; + \node[block, decorate] at (3, 0) {$Z_{t + 1}$} ; + + \node[block, decorate] at (0, -2.5) {$\begin{pmatrix} + Z_{t, 1} \\ Z_{t, 2} \\ \vdots \\ Z_{t, K} + \end{pmatrix}$ }; + + \node[block, decorate] at (3, -2.5) {$\begin{pmatrix} + Z_{t+1, 1} \\ Z_{t+1, 2} \\ \vdots \\ Z_{t+1, k} + \end{pmatrix} $}; + + \draw[decorate, darkblue, line width = 2mm, ->] (1, 0) -- (2, 0); + \draw[decorate, darkgreen, line width = 2mm, ->] (1, -2.5) -- (2, -2.5); + \draw[decorate, black, line width = 2mm, ->] (3, -1.3) -- (3, -0.4); + \draw[decorate, darkred, line width = 2mm, ->] (1, -1.5) -- (2, -0.75); + \end{tikzpicture} +\end{columns} + +\begin{itemize} + \item $Z_t$: aggregate demand at $t$ + \hfill $Z_{t, k}$:demand of group $k$ at moment $t$ + \item Groups can express tariffs, geographical dispersion, client class ... + \item Profiling vs Prediction + \item We follow Misiti \textit{et al}. (2010) to construct classes of customers to better predict the aggregate. +\end{itemize} +\end{frame} + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Expected result} + +\includegraphics[width = \textwidth]{pics/perf.pdf} +\end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Energy decomposition of the DWT} + +%\begin{block}{ } +\begin{itemize} +\item Energy conservation of the signal +% + \begin{equation*}\label{eq:energy} + \| z\|^2_H \approx \| \widetilde{z_J} \|_2^2 + = c_{0,0}^2 + \sum_{j=0}^{J-1} \sum_{k=0}^{2^j-1} d_{j,k} ^2 = + c_{0,0}^2 + \sum_{j=0}^{J-1} \| \mathbf{d}_{j} \|_2^2. + \end{equation*} +% \item characterization by the set of channel variances estimated at the output of the corresponding filter bank + \item For each $j=0,1,\ldots,J-1$, we compute the \textcolor{blue}{absolute} and + \textcolor{orange}{relative} contribution representations by +% + \[ \underbrace{\hbox{cont}_j = ||\mathbf{d_j}||^2}_{\fbox{\textcolor{blue}{AC}}} + \qquad \text{and} \qquad + \underbrace{\hbox{rel}_j = + \frac{||\mathbf{d_j}||^2} + {\sum_j ||\mathbf{d_j}||^2 }}_{\fbox{\textcolor{orange}{RC}}} .\] + %\item They quantify the relative importance of the scales to the global dynamic. +% \item Only the wavelet coefficients $\set{d_{j,k}}$ are used. +% \item RC normalizes the energy of each signal to 1. +\end{itemize} +%\end{block} +%\end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +%\begin{frame} +% \frametitle{Schema of procedure} + \begin{center} + \includegraphics[width = 7cm, height = 2cm]{./pics/Diagramme1.png} + % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260 + \end{center} + + \begin{footnotesize} + \begin{description} + \item [0. Data preprocessing.] Approximate sample paths of $z_1(t),\ldots,z_n(t)$ %by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$. + \item [1. Feature extraction.] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC). + \item [2. Feature selection.] Screen irrelevant variables. \begin{tiny} [Steinley \& Brusco ('06)]\end{tiny} + %\item [3. Determine the number of clusters.] Detecting significant jumps %in the transformed distortion curve. + %\begin{tiny} [Sugar \& James ('03)]\end{tiny} + %\item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm. + \end{description} \end{footnotesize} + + \end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame} + \frametitle{A function-based distance} + +\begin{columns} +\column{0.6\textwidth} + \begin{itemize} + \item Distance based on wavelet-correlation between two time series + \item Can be used to measure relationship between two functions + %variables, i.e. temperature and load. + \item The strength of the relation is hierarchically decomposed across + scales without losing of time location + \end{itemize} + + Drawback: needs more computation time and storage (complex values) +\column{0.4\textwidth} + \includegraphics[width = \textwidth]{pics/conso-week.png} + + \includegraphics[width = .96\textwidth]{pics/wsp-week.png} + +\end{columns} + \end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{A 2-stages strategy (Energycon)} + +\includegraphics[width = \textwidth]{pics/2-stage_strategy.png} + +\footnotetext[1]{ + J. Cugliari, Y. Goude and J. M. Poggi, "Disaggregated electricity forecasting using wavelet-based clustering of individual consumers," 2016 IEEE International Energy Conference (ENERGYCON), Leuven, 2016, pp. 1-6. + } + +\end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Data description} + +\begin{columns} +\column{0.45\textwidth} +\begin{block}{Available} +\begin{itemize} +\item +EDF : 25K professional clients, sampled @ 30min, 5 semesters +\item +external open data +\end{itemize} +\end{block} + +\begin{block}{Accesible} + \begin{itemize} + \item simulated (very large) data + \end{itemize} +\end{block} +\column{0.55\textwidth} +\includegraphics[width = \columnwidth]{pics/indiv.jpg} +%\textcolor{red}{A picture here?} +\end{columns} + +\end{frame} + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Computing resources} +\begin{block}{2 academic testing architectures} +\begin{itemize} +\item Orsay's cluster (500Gb RAM, 80 cores) +\item \texttt{pulpito} : Lyon 2's box with 2 quadricores (HT x 2), 72Gb RAM +\end{itemize} +\end{block} + +\begin{block}{1 industrial real-scale architecture} +\begin{itemize} +\item mini cluster @ EDF labs +\end{itemize} +\end{block} + +\end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Strategies for upscaling} + +\begin{itemize} +\item From 25K to 25M: in 1000 chunks of 25K +\item Reference values: +\begin{itemize} +\item $K'=200$ super consumers (SC) +\item $K\ast=15$ final clusters +\end{itemize} +\end{itemize} + + + +\begin{block}{1st strategy} +\begin{itemize} +\item Do 1000 times ONLY Energycon's 1st-step strategy on 25K clients + +\item With the $1000 \times K'$ SC perform a 2-step run + leading to $K^\ast$ clusters +\end{itemize} +\end{block} + +\begin{block}{2nd strategy} +\begin{itemize} +\item Do 1000 times Energycon's 2-step strategy on 25K clients + leading to $1000\times K^\ast$ intermediate clusters +\item Treat the intermediate clusters as individual curves and perform + a single 2-step run to get $K^\ast$ final clusters +\end{itemize} +\end{block} + +\end{frame} + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame} +\frametitle{Course + Workshop} + +\begin{block}{1-day IRSDI-ECAS Course} +\begin{itemize} +\item +GAM : from classical to distributed environments +\item +October 19, 2017 @ EDF Labs, Paris-Saclay, France +\item +Simon Wood \& Matteo Fasiolo (University Walk, Bristol, UK) +\end{itemize} +\end{block} + + +\begin{block}{1-day Worshop} +\begin{itemize} +\item +Individual Electricity Consumers, Data, Packages and Methods +\item +October 20, 2017 @ EDF Labs, Paris-Saclay, France +\item +5 keynote speakers +\begin{itemize} +\item +Souhaib Ben Taieb, Monash University, Melbourne, Australia +\item +Ram Rajagopal, Stanford Univ., USA +\item +Gavin Shaddick, University of Bath, UK +\item +Bei Chen, IBM Research, Ireland +\item +Jack Kelly, University of London, Imperial College of Science, UK +\end{itemize} +\end{itemize} +\end{block} +\end{frame} + + +\section{Point sur les codes} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Résumé point sur le code (BA, JC @ Lyon déc 2016)} + +Nous avons réussi à +\begin{itemize} +\item +faire une fresh installation du code de BA sur une nouvelle machine +(problèmes divers liés à la compilation, configuration, libraries exotiques) +\item +conduire des expériences sur les données pour mesurer le temps de calcul (le code est blazing fast: 30sec pour obtenir 500 groupes sur 4 procs) +\item +identifier de problèmes : manque un installateur et une interface de pretraitement indépendant du calcul +\end{itemize} + +A faire: +\begin{itemize} +\item +finir les experiences (sur nb de classes, nb de curves / chunk, nb de procs) et sur d'autres architectures +\item +interface matrice -> binaire +\item +obtenir les courbes synchrones +\end{itemize} + +Piste à explorer pour les comparaisons: \texttt{h2o} +\end{frame} + +\section{Expériences numériques} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame} +\frametitle{Code C/MPI} + +\begin{enumerate} +\item [0]] Sérialisation des données : on écrit d'abord la longueur de la série puis les puissances sont codées sur 3 octets, permettant une excellente compression et une lecture facile (accès en O(1) à n'importe quelle série) + +\item [1]] Algorithme PAM appliqué en parallèle via la librairie MPI. + +\item [2]] Agrégation des médoïdes obtenus, (re-)sérialisation, puis on ré-applique l'algorithme PAM. + +\end{enumerate} + +\begin{itemize} +\item + %Plusieurs astuces : sérialisation des données, calcul en parallèle +\item + Très rapide : environ 5 minutes from raw to 1st stage clustering + \item + Divergences par rapport à Energycon (moyennes au lieu d'aggrégation) + \end{itemize} + + %\begin{verbatim} + %> time ./ppam.exe serialize 2009.csv 2009.bin 1 0 + %real 7m34.182s + %\end{verbatim} +\end{frame} + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame} +\frametitle{R Code} + +\begin{itemize} +\item Enercon's code update +\item \texttt{data.table} is used for readings and writtings \footnote{\texttt{tidyverse} toolbox is much slower} +\item Disk spaces of the plain text associated object +\item Timmings on \texttt{pulpito} (16 cores, 64Gb RAM, SSD) +\end{itemize} +\begin{center} +\begin{tabular}{lccc}\toprule +Task & Time & Memory & Disk \\ \midrule +Raw (15Gb) to matrix & 7 min & + 30 Gb\footnote{\texttt{ff} is a promising alternative if needed} & + 2.7 Gb \\ +Compute contributions & 7 min & <1Gb & 7 Mb \\ +1st stage clustering & 3 min & <1Gb & -- \\ +Aggregation & 1 min & 6Gb & 30 Mb \\ +Wer distance matrix & 40 min & 64Gb\footnote{Embarransgly parallel but still too slow} & 150 Kb \\ +Forecasts & 10 min & <1Gb & --\\ +\bottomrule +\end{tabular} +\end{center} +\end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame} +\frametitle{Why wer distance is so slow ?} + + +\begin{block}{2nd step strategy (Enercon way)} +% We proceed as follows: + \begin{itemize} + \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$. + \item Compute the wer-based dissimilarity matrix + \item Obtain the PAM-based clustering. +\end{itemize} + +\begin{block}{Current choices on the computation} +\begin{itemize} +\item From (\texttt{Rwave} \& \texttt{sowas}) to \texttt{biwavelt} +\item About 1 sec to compute \texttt{werd(x, y)} with current + filtering ($J \sim 52$ with 13 octaves, 4 voices ) +\item Need to compute $n (n - 1) / 2$ pairwise distances + (20K, 130K, 500K entries for $n = 200, 500, 1000$) +\item Need an efficient \texttt{werd} function (maybe in + RcppParallel ?) +\end{itemize} + +\end{block} + + +\end{block} + +\end{frame} + + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{CWT} + +\begin{block}{Continuous WT} +Starting with a mother wavelet $\psi$ consider $\psi_{a, \tau} = a^{-1/2} \psi\left(\frac{t-\tau}{a}\right)$. + +The CWT of a function $z\in L^2 (\mathbb{R})$ is, +$$ W_z(a, \tau) = \int_{-\infty}^{\infty} z(t) \psi_{a, \tau}^* (t) dt$$ + +As for Fourier transform, a spectral approach is possible. + + +\begin{eqnarray*} +S_z(a, \tau) &=& |W_z(a, \tau)|^2 \qquad\qquad \hbox{wavelet spectrum} \\ +\mathcal{W}_{z, x}(a, \tau) &=& W_z(a, \tau)W_x^*(a, \tau) \qquad \hbox{cross-wavelet transform} +\end{eqnarray*} + +%$$ S_z(a, \tau) = |W_z(a, \tau)|^2 \qquad \hbox{wavelet spectrum}$$ + +%$$ \mathcal{W}_{z, x}(a, \tau) = W_z(a, \tau)W_x(a, \tau)^* \qquad \hbox{cross-wavelet transform}$$ +\end{block} + +\end{frame} + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame}{Wavelet coherence} +\begin{block}{ } +\begin{equation*} \label{coherence} +R_{z,x}^2(a,\tau) = \frac{ |\tilde{\mathcal{W}}_{x,y}(a, \tau)|^2 }{|\tilde{\mathcal{W}}_{x,x}(a, \tau)| |\tilde{\mathcal{W}}_{y,y}(a, \tau) | }, +\end{equation*} + +Based on the extended $R^2$ coefficient, we can construct an coefficient of determination between two wavelet spectrums + +\begin{equation*}\label{eq:wer} + WER_{z, x}^2 = \frac{ + \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, x}(a, \tau)| d\tau \right)^2 da} { \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, z}(a, \tau)| d\tau \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{x, x}(a, \tau)| d\tau\right) da}. + \end{equation*} + +And obtain a dissimilarity based on it + +\begin{equation*}\label{eq:dist-wer} + d(z, x) = \sqrt{ JN(1 - \widehat{WER}_{z, x}^2)} +\end{equation*} +\end{block} +\end{frame} + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +%\begin{frame} \frametitle{Wavelet coherence} +%\begin{block}{ } +% We proceed as follows: +% \begin{itemize} +% \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$. +% \item Compute a dissimilarity matrix with the coherency based dissimilarity. +% \item Using PAM obtain clusters $k=8$ clusters. +% \end{itemize} +% +% Rand Index (AC, WER) = 0.26 +% +%\end{block} +% +%\end{frame} + + +\end{document} + + +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +% FRAME: +%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +\begin{frame} +\frametitle{Misc jc} + +\begin{itemize} +\item simulated dataset : howto ? +\item temperature +\item Rcpp +\end{itemize} + + + diff --git a/slides/201701_reunion.pdf b/slides/201701_reunion.pdf new file mode 100644 index 0000000..c0e3067 --- /dev/null +++ b/slides/201701_reunion.pdf @@ -0,0 +1 @@ +#$# git-fat 9ee0bf244bef3ab34c2a737d94c8f48a9d1f3f86 95206 diff --git a/slides/presentation.ipynb b/slides/presentation.ipynb deleted file mode 100644 index 2340782..0000000 --- a/slides/presentation.ipynb +++ /dev/null @@ -1,44 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "ename": "ERROR", - "evalue": "Error in library(epclust): there is no package called ‘epclust’\n", - "output_type": "error", - "traceback": [ - "Error in library(epclust): there is no package called ‘epclust’\nTraceback:\n", - "1. library(epclust)", - "2. stop(txt, domain = NA)" - ] - } - ], - "source": [ - "library(epclust)\n", - "#TODO..." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "R", - "language": "R", - "name": "ir" - }, - "language_info": { - "codemirror_mode": "r", - "file_extension": ".r", - "mimetype": "text/x-r-source", - "name": "R", - "pygments_lexer": "r", - "version": "3.3.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}