[epclust.git] / reports / 2017-01 / 201701_point.tex

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%
%        LOADING DOCUMENT
%
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
\documentclass[10pt]{beamer}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%        LOADING PACKAGES
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[francais]{babel}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{booktabs}
\usetheme{default}
\usepackage{tikz}

\colorlet{darkred}{red!80!black}
\colorlet{darkblue}{blue!80!black}
\colorlet{darkgreen}{green!60!black}

\usetikzlibrary{calc,decorations.pathmorphing,patterns}
\pgfdeclaredecoration{penciline}{initial}{
  \state{initial}[width=+\pgfdecoratedinputsegmentremainingdistance,
  auto corner on length=1mm,]{
    \pgfpathcurveto%
     {% From
       \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}
                 {\pgfdecorationsegmentamplitude}
     }
     {%  Control 1
      \pgfmathrand
      \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt}}
                  {\pgfqpoint{-\pgfdecorationsegmentaspect
                   \pgfdecoratedinputsegmentremainingdistance}%
                             {\pgfmathresult\pgfdecorationsegmentamplitude}
                 }
      }
      {%TO 
      \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt}{1pt}}
      }
  }
  \state{final}{}
}
\tikzstyle{block} = [draw,rectangle,thick,minimum height=2em,minimum width=2em]


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%        BEAMER OPTIONS
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%\setbeameroption{show notes}
\setbeamertemplate{navigation symbols}{}
%\setbeamercovered{transparent}
%\AtBeginSection[]{
%\begin{frame}{Outline}
%	\tableofcontents[currentsection]               
%\end{frame}
%}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%
%	PRESENTATION INFORMATION
%
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\author{B. Auder \and
	    J. Cugliari \and
	    Y. Goude   \and
    	J.-M. Poggi
}
\title{Disaggregated Electricity Forecasting using Clustering of 
	   Individual Consumers}
\subtitle{Réunion mi parcours}
%\logo{}
\institute{IRSDI - RESEARCH INITIATIVE IN INDUSTRIAL DATA SCIENCE}
\date{19 janvier 2017}
%\subject{tito}

\begin{document}
	
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%
%	TITLE PAGE
%
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\frame[plain]{\maketitle} 
%\maketitle


\section{IRSDI follow up meeting}	

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{The project in a nutshell}
\begin{block}{Context}
\begin{itemize}
\item 
Industrial : Electricity load forecasting \& smart grids infrastructure 
\item 
Academic : curve's shape \& nonparametric function-valued forecast 
\item 
Past work : clustering with wavelets (RC, Wer), KWF, Enercon 
\end{itemize}
\end{block}

\begin{block}{Aims}
\begin{itemize}
\item evaluate the upscaling capacity of the Energycon strategy 
\item adapt KWF to an exogenous variable (e.g. meteorological)
\end{itemize}
\end{block}
\end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Clients hierarchical structure and prediction}

\begin{columns}
\column{.6\textwidth}
\begin{figure}[!ht]\centering
 \includegraphics[width = \textwidth]{pics/schema.png} 
\caption{Hierarchical structure of $N$ individual clients among $K$ 
groups.}\label{fig:schema-hier}
\end{figure}
 
\column{.4\textwidth}
\begin{tikzpicture}[decoration=penciline, decorate]
  \node[block, decorate] at (0, 0){$Z_t$} ;
  \node[block, decorate] at (3, 0) {$Z_{t + 1}$} ;

  \node[block, decorate] at (0, -2.5) {$\begin{pmatrix}
                              Z_{t, 1} \\ Z_{t, 2} \\ \vdots \\ Z_{t, K}
                               \end{pmatrix}$ };

  \node[block, decorate] at (3, -2.5) {$\begin{pmatrix}
                           Z_{t+1, 1} \\ Z_{t+1, 2} \\ \vdots \\ Z_{t+1, k}
                               \end{pmatrix} $};

  \draw[decorate, darkblue,  line width = 2mm, ->] (1, 0) -- (2, 0);
  \draw[decorate, darkgreen, line width = 2mm, ->] (1, -2.5) -- (2, -2.5);
  \draw[decorate, black,     line width = 2mm, ->] (3, -1.3) -- (3, -0.4);
  \draw[decorate, darkred,   line width = 2mm, ->] (1, -1.5) -- (2, -0.75);
 \end{tikzpicture}
\end{columns}

\begin{itemize}
 \item $Z_t$: aggregate demand at $t$
 \hfill $Z_{t, k}$:demand of group $k$ at moment $t$
 \item Groups can express tariffs, geographical dispersion, client class ...
 \item Profiling vs Prediction 
 \item We follow Misiti \textit{et al}. (2010) to construct classes of customers to better predict the aggregate.
\end{itemize}
\end{frame}


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Expected result}

\includegraphics[width = \textwidth]{pics/perf.pdf}
\end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Energy decomposition of the DWT}

%\begin{block}{ }
\begin{itemize}
\item Energy conservation of the signal
%
  \begin{equation*}\label{eq:energy}  
     \| z\|^2_H    \approx     \| \widetilde{z_J} \|_2^2 
        = c_{0,0}^2 + \sum_{j=0}^{J-1} \sum_{k=0}^{2^j-1} d_{j,k} ^2  = 
                     c_{0,0}^2 + \sum_{j=0}^{J-1} \| \mathbf{d}_{j} \|_2^2.
  \end{equation*}
%  \item characterization by the set of channel variances estimated at the output of the corresponding filter bank
 \item For each $j=0,1,\ldots,J-1$, we compute the \textcolor{blue}{absolute} and 
 \textcolor{orange}{relative} contribution representations by
%      
   \[ \underbrace{\hbox{cont}_j = ||\mathbf{d_j}||^2}_{\fbox{\textcolor{blue}{AC}}}  
      \qquad  \text{and}  \qquad
       \underbrace{\hbox{rel}_j  = 
     \frac{||\mathbf{d_j}||^2}
          {\sum_j ||\mathbf{d_j}||^2 }}_{\fbox{\textcolor{orange}{RC}}} .\]
 %\item They quantify the relative importance of the scales to the global dynamic.
% \item Only the wavelet coefficients $\set{d_{j,k}}$ are used.
% \item RC normalizes the energy of each signal to 1.
\end{itemize}
%\end{block}
%\end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

%\begin{frame} 
%  \frametitle{Schema of procedure}
  \begin{center}
   \includegraphics[width = 7cm, height = 2cm]{./pics/Diagramme1.png}
   % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260
  \end{center}
      
        \begin{footnotesize}
       \begin{description}
 \item [0. Data preprocessing.] Approximate sample paths of $z_1(t),\ldots,z_n(t)$ %by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$.
 \item [1. Feature extraction.] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC).
 \item [2. Feature selection.] Screen irrelevant variables. \begin{tiny} [Steinley \& Brusco ('06)]\end{tiny}
 %\item [3. Determine the number of clusters.] Detecting significant jumps %in the transformed distortion curve.
 %\begin{tiny} [Sugar \& James ('03)]\end{tiny}
 %\item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm.
      \end{description}       \end{footnotesize}
    
 \end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame} 
  \frametitle{A function-based distance}

\begin{columns}
\column{0.6\textwidth}
  \begin{itemize}
    \item Distance based on wavelet-correlation between two time series
    \item Can be used to measure relationship between two functions
    %variables, i.e. temperature and load.
    \item The strength of the relation is hierarchically decomposed across
          scales without losing of time location
   \end{itemize}    

   Drawback: needs more computation time and storage (complex values) 
\column{0.4\textwidth}
  \includegraphics[width  = \textwidth]{pics/conso-week.png} 
                   
  \includegraphics[width  = .96\textwidth]{pics/wsp-week.png} 

\end{columns} 
 \end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{A 2-stages strategy (Energycon)}

\includegraphics[width = \textwidth]{pics/2-stage_strategy.png}

\footnotetext[1]{
	J. Cugliari, Y. Goude and J. M. Poggi, "Disaggregated electricity forecasting using wavelet-based clustering of individual consumers," 2016 IEEE International Energy Conference (ENERGYCON), Leuven, 2016, pp. 1-6.
	}

\end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Data description}

\begin{columns}
\column{0.45\textwidth}
\begin{block}{Available}
\begin{itemize}
\item 
EDF : 25K professional clients, sampled @ 30min, 5 semesters 
\item 
external open data 
\end{itemize}
\end{block}

\begin{block}{Accesible}
	\begin{itemize}
		\item simulated (very large) data
	\end{itemize}
\end{block}
\column{0.55\textwidth}
\includegraphics[width = \columnwidth]{pics/indiv.jpg}
%\textcolor{red}{A picture here?}
\end{columns}

\end{frame}


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Computing resources}
\begin{block}{2 academic testing architectures}
\begin{itemize}
\item Orsay's cluster (500Gb RAM, 80 cores)
\item \texttt{pulpito} : Lyon 2's box with 2 quadricores (HT x 2), 72Gb RAM
\end{itemize}
\end{block}

\begin{block}{1 industrial real-scale architecture}
\begin{itemize}
\item mini cluster @ EDF labs
\end{itemize}
\end{block}

\end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Strategies for upscaling}

\begin{itemize}
\item From 25K to 25M: in 1000 chunks of 25K
\item Reference values: 
\begin{itemize}
\item $K'=200$ super consumers (SC)
\item  $K\ast=15$ final clusters
\end{itemize}
\end{itemize}


\begin{block}{1st strategy}
\begin{itemize}
\item Do 1000 times ONLY Energycon's 1st-step strategy on 25K clients 

\item With the $1000 \times K'$ SC perform a 2-step run 
      leading to $K^\ast$ clusters
\end{itemize}
\end{block}

\begin{block}{2nd strategy}
\begin{itemize}
\item Do 1000 times Energycon's 2-step strategy on 25K clients
      leading to $1000\times K^\ast$ intermediate clusters
\item Treat the intermediate clusters as individual curves and perform
      a single 2-step run to get $K^\ast$ final clusters
\end{itemize}
\end{block}

\end{frame}


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}
\frametitle{Course + Workshop}
	
\begin{block}{1-day IRSDI-ECAS Course}
\begin{itemize}
\item 
GAM : from classical to distributed environments
\item
October 19, 2017 @ EDF Labs, Paris-Saclay, France
\item 
Simon Wood \& Matteo Fasiolo (University Walk, Bristol, UK)
\end{itemize}
\end{block}


\begin{block}{1-day Worshop}
\begin{itemize}
\item 
Individual Electricity Consumers, Data, Packages and Methods
\item
October 20, 2017 @ EDF Labs, Paris-Saclay, France
\item 
5 keynote speakers 
\begin{itemize}
\item
Souhaib Ben Taieb, Monash University, Melbourne, Australia
\item
Ram Rajagopal, Stanford Univ., USA
\item
Gavin Shaddick, University of Bath, UK
\item
Bei Chen, IBM Research, Ireland
\item
Jack Kelly, University of London, Imperial College of Science, UK
\end{itemize}
\end{itemize}
\end{block}
\end{frame}


\section{Point sur les codes}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Résumé point sur le code (BA, JC @ Lyon déc 2016)}
	
Nous avons réussi à 
\begin{itemize}
\item
faire une fresh installation du code de BA sur une nouvelle machine
(problèmes divers liés à la compilation, configuration, libraries exotiques)
\item 
conduire des expériences sur les données pour mesurer le temps de calcul (le code est blazing fast: 30sec pour obtenir 500 groupes sur 4 procs)
\item
identifier de problèmes : manque un installateur et une interface de pretraitement indépendant du calcul
\end{itemize}
	
A faire:
\begin{itemize}
\item
finir les experiences (sur nb de classes, nb de curves / chunk, nb de procs) et sur d'autres architectures
\item
interface matrice -> binaire
\item 
obtenir les courbes synchrones
\end{itemize}
	
Piste à explorer pour les comparaisons: \texttt{h2o} 
\end{frame}

\section{Expériences numériques}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}
\frametitle{Code C/MPI}
	
\begin{enumerate}
\item [0]] Sérialisation des données : on écrit d'abord la longueur de la série	puis les puissances sont codées sur 3 octets, permettant une excellente compression et une lecture facile (accès en O(1) à n'importe quelle série)
		
\item [1]] Algorithme PAM appliqué en parallèle via la librairie MPI.
		
\item [2]] Agrégation des médoïdes obtenus, (re-)sérialisation, puis on ré-applique l'algorithme PAM.

\end{enumerate}

\begin{itemize}
\item 
		%Plusieurs astuces : sérialisation des données, calcul en parallèle
\item 
		Très rapide : environ 5 minutes from raw to 1st stage clustering
		\item 
		Divergences par rapport à Energycon (moyennes au lieu d'aggrégation)
	\end{itemize}
	
	%\begin{verbatim}
	%> time ./ppam.exe serialize 2009.csv 2009.bin 1 0
	%real	7m34.182s
	%\end{verbatim}
\end{frame}


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}
\frametitle{R Code}

\begin{itemize}
\item Enercon's code update 
\item \texttt{data.table} is used for readings and writtings \footnote{\texttt{tidyverse} toolbox is much slower}
\item Disk spaces of the plain text associated object
\item Timmings on \texttt{pulpito}  (16 cores, 64Gb RAM, SSD) 
\end{itemize}
\begin{center}
\begin{tabular}{lccc}\toprule
Task & Time & Memory & Disk \\ \midrule
Raw (15Gb) to matrix  &   7 min  &
  30 Gb\footnote{\texttt{ff} is a promising alternative if needed} &
  2.7 Gb \\
Compute contributions  &   7 min  & <1Gb & 7 Mb \\
1st stage clustering   &   3 min  & <1Gb & -- \\
Aggregation            &   1 min  &  6Gb & 30 Mb \\
Wer distance matrix    &  40 min  & 64Gb\footnote{Embarransgly parallel but still too slow} & 150 Kb \\
Forecasts              &  10 min  & <1Gb & --\\ 
\bottomrule
\end{tabular}
\end{center}
\end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}
\frametitle{Why wer distance is so slow ?}


\begin{block}{2nd step strategy (Enercon way)}
% We proceed as follows:
 \begin{itemize}
    \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
    \item Compute the wer-based dissimilarity matrix 
    \item Obtain the PAM-based clustering.
\end{itemize}

\begin{block}{Current choices on the computation}
\begin{itemize}
\item From (\texttt{Rwave} \& \texttt{sowas}) to \texttt{biwavelt}
\item About 1 sec to compute \texttt{werd(x, y)} with current 
      filtering ($J \sim 52$ with 13 octaves, 4 voices )
\item Need to compute $n (n - 1) / 2$ pairwise distances 
     (20K, 130K, 500K entries for $n = 200, 500, 1000$)
\item Need an efficient \texttt{werd} function (maybe in 
      RcppParallel ?)
\end{itemize}

\end{block}

  
\end{block}

\end{frame}


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{CWT}

\begin{block}{Continuous WT}
Starting with a mother wavelet $\psi$ consider $\psi_{a, \tau} = a^{-1/2} \psi\left(\frac{t-\tau}{a}\right)$.

The CWT of a function $z\in L^2 (\mathbb{R})$ is,
$$ W_z(a, \tau) = \int_{-\infty}^{\infty} z(t) \psi_{a, \tau}^* (t) dt$$

As for Fourier transform, a spectral approach is possible.


\begin{eqnarray*}
S_z(a, \tau)  &=& |W_z(a, \tau)|^2 \qquad\qquad \hbox{wavelet spectrum} \\
\mathcal{W}_{z, x}(a, \tau)  &=& W_z(a, \tau)W_x^*(a, \tau) \qquad \hbox{cross-wavelet transform}
\end{eqnarray*}

%$$ S_z(a, \tau) = |W_z(a, \tau)|^2 \qquad \hbox{wavelet spectrum}$$

%$$ \mathcal{W}_{z, x}(a, \tau) = W_z(a, \tau)W_x(a, \tau)^* \qquad \hbox{cross-wavelet transform}$$
\end{block}

\end{frame}


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}{Wavelet coherence}
\begin{block}{ }
\begin{equation*} \label{coherence}
R_{z,x}^2(a,\tau) = \frac{ |\tilde{\mathcal{W}}_{x,y}(a, \tau)|^2 }{|\tilde{\mathcal{W}}_{x,x}(a, \tau)| |\tilde{\mathcal{W}}_{y,y}(a, \tau) | },
\end{equation*}

Based on the extended $R^2$ coefficient, we can construct an coefficient of determination between two wavelet spectrums
 
\begin{equation*}\label{eq:wer}
  WER_{z, x}^2 = \frac{ 
 \int_0^\infty  \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, x}(a, \tau)|  d\tau \right)^2 da} { \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, z}(a, \tau)| d\tau \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{x, x}(a, \tau)| d\tau\right) da}.
 \end{equation*}

And obtain a dissimilarity based on it
 
\begin{equation*}\label{eq:dist-wer}
    d(z, x) = \sqrt{ JN(1 - \widehat{WER}_{z, x}^2)} 
\end{equation*}
\end{block}
\end{frame}

%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

%\begin{frame} \frametitle{Wavelet coherence}
%\begin{block}{ }
% We proceed as follows:
% \begin{itemize}
%    \item Transform data $z_1(t), \ldots, z_n(t)$ using the  CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
%    \item Compute a dissimilarity matrix with the coherency based dissimilarity.
%    \item Using PAM obtain clusters $k=8$ clusters.
%   \end{itemize}
%
%  Rand Index (AC, WER) = 0.26
%  
%\end{block}
%
%\end{frame}


\end{document}


%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%	FRAME:
%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

\begin{frame}
\frametitle{Misc jc}
	
\begin{itemize}
\item simulated dataset : howto ?
\item temperature
\item Rcpp
\end{itemize}
Commit	Line	Data
	1	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	2	%
	3	% LOADING DOCUMENT
	4	%
	5	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	6	\documentclass[10pt]{beamer}
	7
	8	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	9	% LOADING PACKAGES
	10	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	11	\usepackage[utf8]{inputenc}
	12	\usepackage[T1]{fontenc}
	13	\usepackage[francais]{babel}
	14	\usepackage{amsmath}
	15	\usepackage{amsfonts}
	16	\usepackage{amssymb}
	17	\usepackage{graphicx}
	18	\usepackage{booktabs}
	19	\usetheme{default}
	20	\usepackage{tikz}
	21
	22	\colorlet{darkred}{red!80!black}
	23	\colorlet{darkblue}{blue!80!black}
	24	\colorlet{darkgreen}{green!60!black}
	25
	26	\usetikzlibrary{calc,decorations.pathmorphing,patterns}
	27	\pgfdeclaredecoration{penciline}{initial}{
	28	\state{initial}[width=+\pgfdecoratedinputsegmentremainingdistance,
	29	auto corner on length=1mm,]{
	30	\pgfpathcurveto%
	31	{% From
	32	\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}
	33	{\pgfdecorationsegmentamplitude}
	34	}
	35	{% Control 1
	36	\pgfmathrand
	37	\pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt}}
	38	{\pgfqpoint{-\pgfdecorationsegmentaspect
	39	\pgfdecoratedinputsegmentremainingdistance}%
	40	{\pgfmathresult\pgfdecorationsegmentamplitude}
	41	}
	42	}
	43	{%TO
	44	\pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt}{1pt}}
	45	}
	46	}
	47	\state{final}{}
	48	}
	49	\tikzstyle{block} = [draw,rectangle,thick,minimum height=2em,minimum width=2em]
	50
	51
	52	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	53	% BEAMER OPTIONS
	54	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	55	%\setbeameroption{show notes}
	56	\setbeamertemplate{navigation symbols}{}
	57	%\setbeamercovered{transparent}
	58	%\AtBeginSection[]{
	59	%\begin{frame}{Outline}
	60	% \tableofcontents[currentsection]
	61	%\end{frame}
	62	%}
	63
	64	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	65	%
	66	% PRESENTATION INFORMATION
	67	%
	68	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	69
	70	\author{B. Auder \and
	71	J. Cugliari \and
	72	Y. Goude \and
	73	J.-M. Poggi
	74	}
	75	\title{Disaggregated Electricity Forecasting using Clustering of
	76	Individual Consumers}
	77	\subtitle{Réunion mi parcours}
	78	%\logo{}
	79	\institute{IRSDI - RESEARCH INITIATIVE IN INDUSTRIAL DATA SCIENCE}
	80	\date{19 janvier 2017}
	81	%\subject{tito}
	82
	83	\begin{document}
	84
	85	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	86	%
	87	% TITLE PAGE
	88	%
	89	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	90
	91	\frame[plain]{\maketitle}
	92	%\maketitle
	93
	94
	95	\section{IRSDI follow up meeting}
	96
	97	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	98	% FRAME:
	99	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	100
	101	\begin{frame}{The project in a nutshell}
	102	\begin{block}{Context}
	103	\begin{itemize}
	104	\item
	105	Industrial : Electricity load forecasting \& smart grids infrastructure
	106	\item
	107	Academic : curve's shape \& nonparametric function-valued forecast
	108	\item
	109	Past work : clustering with wavelets (RC, Wer), KWF, Enercon
	110	\end{itemize}
	111	\end{block}
	112
	113	\begin{block}{Aims}
	114	\begin{itemize}
	115	\item evaluate the upscaling capacity of the Energycon strategy
	116	\item adapt KWF to an exogenous variable (e.g. meteorological)
	117	\end{itemize}
	118	\end{block}
	119	\end{frame}
	120
	121	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	122	% FRAME:
	123	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	124
	125	\begin{frame}{Clients hierarchical structure and prediction}
	126
	127	\begin{columns}
	128	\column{.6\textwidth}
	129	\begin{figure}[!ht]\centering
	130	\includegraphics[width = \textwidth]{pics/schema.png}
	131	\caption{Hierarchical structure of $N$ individual clients among $K$
	132	groups.}\label{fig:schema-hier}
	133	\end{figure}
	134
	135	\column{.4\textwidth}
	136	\begin{tikzpicture}[decoration=penciline, decorate]
	137	\node[block, decorate] at (0, 0){$Z_t$} ;
	138	\node[block, decorate] at (3, 0) {$Z_{t + 1}$} ;
	139
	140	\node[block, decorate] at (0, -2.5) {$\begin{pmatrix}
	141	Z_{t, 1} \\ Z_{t, 2} \\ \vdots \\ Z_{t, K}
	142	\end{pmatrix}$ };
	143
	144	\node[block, decorate] at (3, -2.5) {$\begin{pmatrix}
	145	Z_{t+1, 1} \\ Z_{t+1, 2} \\ \vdots \\ Z_{t+1, k}
	146	\end{pmatrix} $};
	147
	148	\draw[decorate, darkblue, line width = 2mm, ->] (1, 0) -- (2, 0);
	149	\draw[decorate, darkgreen, line width = 2mm, ->] (1, -2.5) -- (2, -2.5);
	150	\draw[decorate, black, line width = 2mm, ->] (3, -1.3) -- (3, -0.4);
	151	\draw[decorate, darkred, line width = 2mm, ->] (1, -1.5) -- (2, -0.75);
	152	\end{tikzpicture}
	153	\end{columns}
	154
	155	\begin{itemize}
	156	\item $Z_t$: aggregate demand at $t$
	157	\hfill $Z_{t, k}$:demand of group $k$ at moment $t$
	158	\item Groups can express tariffs, geographical dispersion, client class ...
	159	\item Profiling vs Prediction
	160	\item We follow Misiti \textit{et al}. (2010) to construct classes of customers to better predict the aggregate.
	161	\end{itemize}
	162	\end{frame}
	163
	164
	165	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	166	% FRAME:
	167	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	168
	169	\begin{frame}{Expected result}
	170
	171	\includegraphics[width = \textwidth]{pics/perf.pdf}
	172	\end{frame}
	173
	174	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	175	% FRAME:
	176	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	177
	178	\begin{frame}{Energy decomposition of the DWT}
	179
	180	%\begin{block}{ }
	181	\begin{itemize}
	182	\item Energy conservation of the signal
	183	%
	184	\begin{equation*}\label{eq:energy}
	185	\\| z\\|^2_H \approx \\| \widetilde{z_J} \\|_2^2
	186	= c_{0,0}^2 + \sum_{j=0}^{J-1} \sum_{k=0}^{2^j-1} d_{j,k} ^2 =
	187	c_{0,0}^2 + \sum_{j=0}^{J-1} \\| \mathbf{d}_{j} \\|_2^2.
	188	\end{equation*}
	189	% \item characterization by the set of channel variances estimated at the output of the corresponding filter bank
	190	\item For each $j=0,1,\ldots,J-1$, we compute the \textcolor{blue}{absolute} and
	191	\textcolor{orange}{relative} contribution representations by
	192	%
	193	\[ \underbrace{\hbox{cont}_j = \|\|\mathbf{d_j}\|\|^2}_{\fbox{\textcolor{blue}{AC}}}
	194	\qquad \text{and} \qquad
	195	\underbrace{\hbox{rel}_j =
	196	\frac{\|\|\mathbf{d_j}\|\|^2}
	197	{\sum_j \|\|\mathbf{d_j}\|\|^2 }}_{\fbox{\textcolor{orange}{RC}}} .\]
	198	%\item They quantify the relative importance of the scales to the global dynamic.
	199	% \item Only the wavelet coefficients $\set{d_{j,k}}$ are used.
	200	% \item RC normalizes the energy of each signal to 1.
	201	\end{itemize}
	202	%\end{block}
	203	%\end{frame}
	204
	205	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	206	% FRAME:
	207	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	208
	209	%\begin{frame}
	210	% \frametitle{Schema of procedure}
	211	\begin{center}
	212	\includegraphics[width = 7cm, height = 2cm]{./pics/Diagramme1.png}
	213	% Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260
	214	\end{center}
	215
	216	\begin{footnotesize}
	217	\begin{description}
	218	\item [0. Data preprocessing.] Approximate sample paths of $z_1(t),\ldots,z_n(t)$ %by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$.
	219	\item [1. Feature extraction.] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC).
	220	\item [2. Feature selection.] Screen irrelevant variables. \begin{tiny} [Steinley \& Brusco ('06)]\end{tiny}
	221	%\item [3. Determine the number of clusters.] Detecting significant jumps %in the transformed distortion curve.
	222	%\begin{tiny} [Sugar \& James ('03)]\end{tiny}
	223	%\item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm.
	224	\end{description} \end{footnotesize}
	225
	226	\end{frame}
	227
	228	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	229	% FRAME:
	230	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	231
	232	\begin{frame}
	233	\frametitle{A function-based distance}
	234
	235	\begin{columns}
	236	\column{0.6\textwidth}
	237	\begin{itemize}
	238	\item Distance based on wavelet-correlation between two time series
	239	\item Can be used to measure relationship between two functions
	240	%variables, i.e. temperature and load.
	241	\item The strength of the relation is hierarchically decomposed across
	242	scales without losing of time location
	243	\end{itemize}
	244
	245	Drawback: needs more computation time and storage (complex values)
	246	\column{0.4\textwidth}
	247	\includegraphics[width = \textwidth]{pics/conso-week.png}
	248
	249	\includegraphics[width = .96\textwidth]{pics/wsp-week.png}
	250
	251	\end{columns}
	252	\end{frame}
	253
	254	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	255	% FRAME:
	256	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	257
	258	\begin{frame}{A 2-stages strategy (Energycon)}
	259
	260	\includegraphics[width = \textwidth]{pics/2-stage_strategy.png}
	261
	262	\footnotetext[1]{
	263	J. Cugliari, Y. Goude and J. M. Poggi, "Disaggregated electricity forecasting using wavelet-based clustering of individual consumers," 2016 IEEE International Energy Conference (ENERGYCON), Leuven, 2016, pp. 1-6.
	264	}
	265
	266	\end{frame}
	267
	268	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	269	% FRAME:
	270	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	271
	272	\begin{frame}{Data description}
	273
	274	\begin{columns}
	275	\column{0.45\textwidth}
	276	\begin{block}{Available}
	277	\begin{itemize}
	278	\item
	279	EDF : 25K professional clients, sampled @ 30min, 5 semesters
	280	\item
	281	external open data
	282	\end{itemize}
	283	\end{block}
	284
	285	\begin{block}{Accesible}
	286	\begin{itemize}
	287	\item simulated (very large) data
	288	\end{itemize}
	289	\end{block}
	290	\column{0.55\textwidth}
	291	\includegraphics[width = \columnwidth]{pics/indiv.jpg}
	292	%\textcolor{red}{A picture here?}
	293	\end{columns}
	294
	295	\end{frame}
	296
	297
	298	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	299	% FRAME:
	300	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	301
	302	\begin{frame}{Computing resources}
	303	\begin{block}{2 academic testing architectures}
	304	\begin{itemize}
	305	\item Orsay's cluster (500Gb RAM, 80 cores)
	306	\item \texttt{pulpito} : Lyon 2's box with 2 quadricores (HT x 2), 72Gb RAM
	307	\end{itemize}
	308	\end{block}
	309
	310	\begin{block}{1 industrial real-scale architecture}
	311	\begin{itemize}
	312	\item mini cluster @ EDF labs
	313	\end{itemize}
	314	\end{block}
	315
	316	\end{frame}
	317
	318	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	319	% FRAME:
	320	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	321
	322	\begin{frame}{Strategies for upscaling}
	323
	324	\begin{itemize}
	325	\item From 25K to 25M: in 1000 chunks of 25K
	326	\item Reference values:
	327	\begin{itemize}
	328	\item $K'=200$ super consumers (SC)
	329	\item $K\ast=15$ final clusters
	330	\end{itemize}
	331	\end{itemize}
	332
	333
	334
	335	\begin{block}{1st strategy}
	336	\begin{itemize}
	337	\item Do 1000 times ONLY Energycon's 1st-step strategy on 25K clients
	338
	339	\item With the $1000 \times K'$ SC perform a 2-step run
	340	leading to $K^\ast$ clusters
	341	\end{itemize}
	342	\end{block}
	343
	344	\begin{block}{2nd strategy}
	345	\begin{itemize}
	346	\item Do 1000 times Energycon's 2-step strategy on 25K clients
	347	leading to $1000\times K^\ast$ intermediate clusters
	348	\item Treat the intermediate clusters as individual curves and perform
	349	a single 2-step run to get $K^\ast$ final clusters
	350	\end{itemize}
	351	\end{block}
	352
	353	\end{frame}
	354
	355
	356	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	357	% FRAME:
	358	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	359
	360	\begin{frame}
	361	\frametitle{Course + Workshop}
	362
	363	\begin{block}{1-day IRSDI-ECAS Course}
	364	\begin{itemize}
	365	\item
	366	GAM : from classical to distributed environments
	367	\item
	368	October 19, 2017 @ EDF Labs, Paris-Saclay, France
	369	\item
	370	Simon Wood \& Matteo Fasiolo (University Walk, Bristol, UK)
	371	\end{itemize}
	372	\end{block}
	373
	374
	375	\begin{block}{1-day Worshop}
	376	\begin{itemize}
	377	\item
	378	Individual Electricity Consumers, Data, Packages and Methods
	379	\item
	380	October 20, 2017 @ EDF Labs, Paris-Saclay, France
	381	\item
	382	5 keynote speakers
	383	\begin{itemize}
	384	\item
	385	Souhaib Ben Taieb, Monash University, Melbourne, Australia
	386	\item
	387	Ram Rajagopal, Stanford Univ., USA
	388	\item
	389	Gavin Shaddick, University of Bath, UK
	390	\item
	391	Bei Chen, IBM Research, Ireland
	392	\item
	393	Jack Kelly, University of London, Imperial College of Science, UK
	394	\end{itemize}
	395	\end{itemize}
	396	\end{block}
	397	\end{frame}
	398
	399
	400	\section{Point sur les codes}
	401
	402	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	403	% FRAME:
	404	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	405
	406	\begin{frame}{Résumé point sur le code (BA, JC @ Lyon déc 2016)}
	407
	408	Nous avons réussi à
	409	\begin{itemize}
	410	\item
	411	faire une fresh installation du code de BA sur une nouvelle machine
	412	(problèmes divers liés à la compilation, configuration, libraries exotiques)
	413	\item
	414	conduire des expériences sur les données pour mesurer le temps de calcul (le code est blazing fast: 30sec pour obtenir 500 groupes sur 4 procs)
	415	\item
	416	identifier de problèmes : manque un installateur et une interface de pretraitement indépendant du calcul
	417	\end{itemize}
	418
	419	A faire:
	420	\begin{itemize}
	421	\item
	422	finir les experiences (sur nb de classes, nb de curves / chunk, nb de procs) et sur d'autres architectures
	423	\item
	424	interface matrice -> binaire
	425	\item
	426	obtenir les courbes synchrones
	427	\end{itemize}
	428
	429	Piste à explorer pour les comparaisons: \texttt{h2o}
	430	\end{frame}
	431
	432	\section{Expériences numériques}
	433
	434	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	435	% FRAME:
	436	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	437
	438	\begin{frame}
	439	\frametitle{Code C/MPI}
	440
	441	\begin{enumerate}
	442	\item [0]] Sérialisation des données : on écrit d'abord la longueur de la série puis les puissances sont codées sur 3 octets, permettant une excellente compression et une lecture facile (accès en O(1) à n'importe quelle série)
	443
	444	\item [1]] Algorithme PAM appliqué en parallèle via la librairie MPI.
	445
	446	\item [2]] Agrégation des médoïdes obtenus, (re-)sérialisation, puis on ré-applique l'algorithme PAM.
	447
	448	\end{enumerate}
	449
	450	\begin{itemize}
	451	\item
	452	%Plusieurs astuces : sérialisation des données, calcul en parallèle
	453	\item
	454	Très rapide : environ 5 minutes from raw to 1st stage clustering
	455	\item
	456	Divergences par rapport à Energycon (moyennes au lieu d'aggrégation)
	457	\end{itemize}
	458
	459	%\begin{verbatim}
	460	%> time ./ppam.exe serialize 2009.csv 2009.bin 1 0
	461	%real 7m34.182s
	462	%\end{verbatim}
	463	\end{frame}
	464
	465
	466	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	467	% FRAME:
	468	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	469
	470	\begin{frame}
	471	\frametitle{R Code}
	472
	473	\begin{itemize}
	474	\item Enercon's code update
	475	\item \texttt{data.table} is used for readings and writtings \footnote{\texttt{tidyverse} toolbox is much slower}
	476	\item Disk spaces of the plain text associated object
	477	\item Timmings on \texttt{pulpito} (16 cores, 64Gb RAM, SSD)
	478	\end{itemize}
	479	\begin{center}
	480	\begin{tabular}{lccc}\toprule
	481	Task & Time & Memory & Disk \\ \midrule
	482	Raw (15Gb) to matrix & 7 min &
	483	30 Gb\footnote{\texttt{ff} is a promising alternative if needed} &
	484	2.7 Gb \\
	485	Compute contributions & 7 min & <1Gb & 7 Mb \\
	486	1st stage clustering & 3 min & <1Gb & -- \\
	487	Aggregation & 1 min & 6Gb & 30 Mb \\
	488	Wer distance matrix & 40 min & 64Gb\footnote{Embarransgly parallel but still too slow} & 150 Kb \\
	489	Forecasts & 10 min & <1Gb & --\\
	490	\bottomrule
	491	\end{tabular}
	492	\end{center}
	493	\end{frame}
	494
	495	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	496	% FRAME:
	497	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	498
	499	\begin{frame}
	500	\frametitle{Why wer distance is so slow ?}
	501
	502
	503	\begin{block}{2nd step strategy (Enercon way)}
	504	% We proceed as follows:
	505	\begin{itemize}
	506	\item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
	507	\item Compute the wer-based dissimilarity matrix
	508	\item Obtain the PAM-based clustering.
	509	\end{itemize}
	510
	511	\begin{block}{Current choices on the computation}
	512	\begin{itemize}
	513	\item From (\texttt{Rwave} \& \texttt{sowas}) to \texttt{biwavelt}
	514	\item About 1 sec to compute \texttt{werd(x, y)} with current
	515	filtering ($J \sim 52$ with 13 octaves, 4 voices )
	516	\item Need to compute $n (n - 1) / 2$ pairwise distances
	517	(20K, 130K, 500K entries for $n = 200, 500, 1000$)
	518	\item Need an efficient \texttt{werd} function (maybe in
	519	RcppParallel ?)
	520	\end{itemize}
	521
	522	\end{block}
	523
	524
	525	\end{block}
	526
	527	\end{frame}
	528
	529
	530
	531	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	532	% FRAME:
	533	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	534
	535	\begin{frame}{CWT}
	536
	537	\begin{block}{Continuous WT}
	538	Starting with a mother wavelet $\psi$ consider $\psi_{a, \tau} = a^{-1/2} \psi\left(\frac{t-\tau}{a}\right)$.
	539
	540	The CWT of a function $z\in L^2 (\mathbb{R})$ is,
	541	$$ W_z(a, \tau) = \int_{-\infty}^{\infty} z(t) \psi_{a, \tau}^* (t) dt$$
	542
	543	As for Fourier transform, a spectral approach is possible.
	544
	545
	546	\begin{eqnarray*}
	547	S_z(a, \tau) &=& \|W_z(a, \tau)\|^2 \qquad\qquad \hbox{wavelet spectrum} \\
	548	\mathcal{W}_{z, x}(a, \tau) &=& W_z(a, \tau)W_x^*(a, \tau) \qquad \hbox{cross-wavelet transform}
	549	\end{eqnarray*}
	550
	551	%$$ S_z(a, \tau) = \|W_z(a, \tau)\|^2 \qquad \hbox{wavelet spectrum}$$
	552
	553	%$$ \mathcal{W}_{z, x}(a, \tau) = W_z(a, \tau)W_x(a, \tau)^* \qquad \hbox{cross-wavelet transform}$$
	554	\end{block}
	555
	556	\end{frame}
	557
	558
	559	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	560	% FRAME:
	561	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	562
	563	\begin{frame}{Wavelet coherence}
	564	\begin{block}{ }
	565	\begin{equation*} \label{coherence}
	566	R_{z,x}^2(a,\tau) = \frac{ \|\tilde{\mathcal{W}}_{x,y}(a, \tau)\|^2 }{\|\tilde{\mathcal{W}}_{x,x}(a, \tau)\| \|\tilde{\mathcal{W}}_{y,y}(a, \tau) \| },
	567	\end{equation*}
	568
	569	Based on the extended $R^2$ coefficient, we can construct an coefficient of determination between two wavelet spectrums
	570
	571	\begin{equation*}\label{eq:wer}
	572	WER_{z, x}^2 = \frac{
	573	\int_0^\infty \left( \int_{-\infty}^\infty \|\tilde{\mathcal{W}}_{z, x}(a, \tau)\| d\tau \right)^2 da} { \int_0^\infty \left( \int_{-\infty}^\infty \|\tilde{\mathcal{W}}_{z, z}(a, \tau)\| d\tau \int_{-\infty}^\infty \|\tilde{\mathcal{W}}_{x, x}(a, \tau)\| d\tau\right) da}.
	574	\end{equation*}
	575
	576	And obtain a dissimilarity based on it
	577
	578	\begin{equation*}\label{eq:dist-wer}
	579	d(z, x) = \sqrt{ JN(1 - \widehat{WER}_{z, x}^2)}
	580	\end{equation*}
	581	\end{block}
	582	\end{frame}
	583
	584	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	585	% FRAME:
	586	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	587
	588	%\begin{frame} \frametitle{Wavelet coherence}
	589	%\begin{block}{ }
	590	% We proceed as follows:
	591	% \begin{itemize}
	592	% \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
	593	% \item Compute a dissimilarity matrix with the coherency based dissimilarity.
	594	% \item Using PAM obtain clusters $k=8$ clusters.
	595	% \end{itemize}
	596	%
	597	% Rand Index (AC, WER) = 0.26
	598	%
	599	%\end{block}
	600	%
	601	%\end{frame}
	602
	603
	604	\end{document}
	605
	606
	607	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	608	% FRAME:
	609	%-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
	610
	611	\begin{frame}
	612	\frametitle{Misc jc}
	613
	614	\begin{itemize}
	615	\item simulated dataset : howto ?
	616	\item temperature
	617	\item Rcpp
	618	\end{itemize}
	619
	620
	621