reports/2017-01/201701_point.tex

   1 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
   2 %
   3 %        LOADING DOCUMENT
   4 %
   5 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
   6 \documentclass[10pt]{beamer}
   7
   8 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
   9 %        LOADING PACKAGES
  10 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  11 \usepackage[utf8]{inputenc}
  12 \usepackage[T1]{fontenc}
  13 \usepackage[francais]{babel}
  14 \usepackage{amsmath}
  15 \usepackage{amsfonts}
  16 \usepackage{amssymb}
  17 \usepackage{graphicx}
  18 \usepackage{booktabs}
  19 \usetheme{default}
  20 \usepackage{tikz}
  21
  22 \colorlet{darkred}{red!80!black}
  23 \colorlet{darkblue}{blue!80!black}
  24 \colorlet{darkgreen}{green!60!black}
  25
  26 \usetikzlibrary{calc,decorations.pathmorphing,patterns}
  27 \pgfdeclaredecoration{penciline}{initial}{
  28   \state{initial}[width=+\pgfdecoratedinputsegmentremainingdistance,
  29   auto corner on length=1mm,]{
  30     \pgfpathcurveto%
  31      {% From
  32        \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}
  33                  {\pgfdecorationsegmentamplitude}
  34      }
  35      {%  Control 1
  36       \pgfmathrand
  37       \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt}}
  38                   {\pgfqpoint{-\pgfdecorationsegmentaspect
  39                    \pgfdecoratedinputsegmentremainingdistance}%
  40                              {\pgfmathresult\pgfdecorationsegmentamplitude}
  41                  }
  42       }
  43       {%TO
  44       \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt}{1pt}}
  45       }
  46   }
  47   \state{final}{}
  48 }
  49 \tikzstyle{block} = [draw,rectangle,thick,minimum height=2em,minimum width=2em]
  50
  51
  52 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  53 %        BEAMER OPTIONS
  54 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  55 %\setbeameroption{show notes}
  56 \setbeamertemplate{navigation symbols}{}
  57 %\setbeamercovered{transparent}
  58 %\AtBeginSection[]{
  59 %\begin{frame}{Outline}
  60 %   \tableofcontents[currentsection]
  61 %\end{frame}
  62 %}
  63
  64 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  65 %
  66 %   PRESENTATION INFORMATION
  67 %
  68 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  69
  70 \author{B. Auder \and
  71         J. Cugliari \and
  72         Y. Goude   \and
  73         J.-M. Poggi
  74 }
  75 \title{Disaggregated Electricity Forecasting using Clustering of
  76        Individual Consumers}
  77 \subtitle{Réunion mi parcours}
  78 %\logo{}
  79 \institute{IRSDI - RESEARCH INITIATIVE IN INDUSTRIAL DATA SCIENCE}
  80 \date{19 janvier 2017}
  81 %\subject{tito}
  82
  83 \begin{document}
  84
  85 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  86 %
  87 %   TITLE PAGE
  88 %
  89 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  90
  91 \frame[plain]{\maketitle}
  92 %\maketitle
  93
  94
  95 \section{IRSDI follow up meeting}
  96
  97 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  98 %   FRAME:
  99 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 100
 101 \begin{frame}{The project in a nutshell}
 102 \begin{block}{Context}
 103 \begin{itemize}
 104 \item
 105 Industrial : Electricity load forecasting \& smart grids infrastructure
 106 \item
 107 Academic : curve's shape \& nonparametric function-valued forecast
 108 \item
 109 Past work : clustering with wavelets (RC, Wer), KWF, Enercon
 110 \end{itemize}
 111 \end{block}
 112
 113 \begin{block}{Aims}
 114 \begin{itemize}
 115 \item evaluate the upscaling capacity of the Energycon strategy
 116 \item adapt KWF to an exogenous variable (e.g. meteorological)
 117 \end{itemize}
 118 \end{block}
 119 \end{frame}
 120
 121 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 122 %   FRAME:
 123 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 124
 125 \begin{frame}{Clients hierarchical structure and prediction}
 126
 127 \begin{columns}
 128 \column{.6\textwidth}
 129 \begin{figure}[!ht]\centering
 130  \includegraphics[width = \textwidth]{pics/schema.png}
 131 \caption{Hierarchical structure of $N$ individual clients among $K$
 132 groups.}\label{fig:schema-hier}
 133 \end{figure}
 134
 135 \column{.4\textwidth}
 136 \begin{tikzpicture}[decoration=penciline, decorate]
 137   \node[block, decorate] at (0, 0){$Z_t$} ;
 138   \node[block, decorate] at (3, 0) {$Z_{t + 1}$} ;
 139
 140   \node[block, decorate] at (0, -2.5) {$\begin{pmatrix}
 141                               Z_{t, 1} \\ Z_{t, 2} \\ \vdots \\ Z_{t, K}
 142                                \end{pmatrix}$ };
 143
 144   \node[block, decorate] at (3, -2.5) {$\begin{pmatrix}
 145                            Z_{t+1, 1} \\ Z_{t+1, 2} \\ \vdots \\ Z_{t+1, k}
 146                                \end{pmatrix} $};
 147
 148   \draw[decorate, darkblue,  line width = 2mm, ->] (1, 0) -- (2, 0);
 149   \draw[decorate, darkgreen, line width = 2mm, ->] (1, -2.5) -- (2, -2.5);
 150   \draw[decorate, black,     line width = 2mm, ->] (3, -1.3) -- (3, -0.4);
 151   \draw[decorate, darkred,   line width = 2mm, ->] (1, -1.5) -- (2, -0.75);
 152  \end{tikzpicture}
 153 \end{columns}
 154
 155 \begin{itemize}
 156  \item $Z_t$: aggregate demand at $t$
 157  \hfill $Z_{t, k}$:demand of group $k$ at moment $t$
 158  \item Groups can express tariffs, geographical dispersion, client class ...
 159  \item Profiling vs Prediction
 160  \item We follow Misiti \textit{et al}. (2010) to construct classes of customers to better predict the aggregate.
 161 \end{itemize}
 162 \end{frame}
 163
 164
 165 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 166 %   FRAME:
 167 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 168
 169 \begin{frame}{Expected result}
 170
 171 \includegraphics[width = \textwidth]{pics/perf.pdf}
 172 \end{frame}
 173
 174 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 175 %   FRAME:
 176 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 177
 178 \begin{frame}{Energy decomposition of the DWT}
 179
 180 %\begin{block}{ }
 181 \begin{itemize}
 182 \item Energy conservation of the signal
 183 %
 184   \begin{equation*}\label{eq:energy}
 185      \| z\|^2_H    \approx     \| \widetilde{z_J} \|_2^2
 186         = c_{0,0}^2 + \sum_{j=0}^{J-1} \sum_{k=0}^{2^j-1} d_{j,k} ^2  =
 187                      c_{0,0}^2 + \sum_{j=0}^{J-1} \| \mathbf{d}_{j} \|_2^2.
 188   \end{equation*}
 189 %  \item characterization by the set of channel variances estimated at the output of the corresponding filter bank
 190  \item For each $j=0,1,\ldots,J-1$, we compute the \textcolor{blue}{absolute} and
 191  \textcolor{orange}{relative} contribution representations by
 192 %
 193    \[ \underbrace{\hbox{cont}_j = ||\mathbf{d_j}||^2}_{\fbox{\textcolor{blue}{AC}}}
 194       \qquad  \text{and}  \qquad
 195        \underbrace{\hbox{rel}_j  =
 196      \frac{||\mathbf{d_j}||^2}
 197           {\sum_j ||\mathbf{d_j}||^2 }}_{\fbox{\textcolor{orange}{RC}}} .\]
 198  %\item They quantify the relative importance of the scales to the global dynamic.
 199 % \item Only the wavelet coefficients $\set{d_{j,k}}$ are used.
 200 % \item RC normalizes the energy of each signal to 1.
 201 \end{itemize}
 202 %\end{block}
 203 %\end{frame}
 204
 205 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 206 %   FRAME:
 207 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 208
 209 %\begin{frame}
 210 %  \frametitle{Schema of procedure}
 211   \begin{center}
 212    \includegraphics[width = 7cm, height = 2cm]{./pics/Diagramme1.png}
 213    % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260
 214   \end{center}
 215
 216         \begin{footnotesize}
 217        \begin{description}
 218  \item [0. Data preprocessing.] Approximate sample paths of $z_1(t),\ldots,z_n(t)$ %by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$.
 219  \item [1. Feature extraction.] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC).
 220  \item [2. Feature selection.] Screen irrelevant variables. \begin{tiny} [Steinley \& Brusco ('06)]\end{tiny}
 221  %\item [3. Determine the number of clusters.] Detecting significant jumps %in the transformed distortion curve.
 222  %\begin{tiny} [Sugar \& James ('03)]\end{tiny}
 223  %\item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm.
 224       \end{description}       \end{footnotesize}
 225
 226  \end{frame}
 227
 228 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 229 %   FRAME:
 230 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 231
 232 \begin{frame}
 233   \frametitle{A function-based distance}
 234
 235 \begin{columns}
 236 \column{0.6\textwidth}
 237   \begin{itemize}
 238     \item Distance based on wavelet-correlation between two time series
 239     \item Can be used to measure relationship between two functions
 240     %variables, i.e. temperature and load.
 241     \item The strength of the relation is hierarchically decomposed across
 242           scales without losing of time location
 243    \end{itemize}
 244
 245    Drawback: needs more computation time and storage (complex values)
 246 \column{0.4\textwidth}
 247   \includegraphics[width  = \textwidth]{pics/conso-week.png}
 248
 249   \includegraphics[width  = .96\textwidth]{pics/wsp-week.png}
 250
 251 \end{columns}
 252  \end{frame}
 253
 254 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 255 %   FRAME:
 256 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 257
 258 \begin{frame}{A 2-stages strategy (Energycon)}
 259
 260 \includegraphics[width = \textwidth]{pics/2-stage_strategy.png}
 261
 262 \footnotetext[1]{
 263     J. Cugliari, Y. Goude and J. M. Poggi, "Disaggregated electricity forecasting using wavelet-based clustering of individual consumers," 2016 IEEE International Energy Conference (ENERGYCON), Leuven, 2016, pp. 1-6.
 264     }
 265
 266 \end{frame}
 267
 268 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 269 %   FRAME:
 270 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 271
 272 \begin{frame}{Data description}
 273
 274 \begin{columns}
 275 \column{0.45\textwidth}
 276 \begin{block}{Available}
 277 \begin{itemize}
 278 \item
 279 EDF : 25K professional clients, sampled @ 30min, 5 semesters
 280 \item
 281 external open data
 282 \end{itemize}
 283 \end{block}
 284
 285 \begin{block}{Accesible}
 286     \begin{itemize}
 287         \item simulated (very large) data
 288     \end{itemize}
 289 \end{block}
 290 \column{0.55\textwidth}
 291 \includegraphics[width = \columnwidth]{pics/indiv.jpg}
 292 %\textcolor{red}{A picture here?}
 293 \end{columns}
 294
 295 \end{frame}
 296
 297
 298 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 299 %   FRAME:
 300 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 301
 302 \begin{frame}{Computing resources}
 303 \begin{block}{2 academic testing architectures}
 304 \begin{itemize}
 305 \item Orsay's cluster (500Gb RAM, 80 cores)
 306 \item \texttt{pulpito} : Lyon 2's box with 2 quadricores (HT x 2), 72Gb RAM
 307 \end{itemize}
 308 \end{block}
 309
 310 \begin{block}{1 industrial real-scale architecture}
 311 \begin{itemize}
 312 \item mini cluster @ EDF labs
 313 \end{itemize}
 314 \end{block}
 315
 316 \end{frame}
 317
 318 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 319 %   FRAME:
 320 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 321
 322 \begin{frame}{Strategies for upscaling}
 323
 324 \begin{itemize}
 325 \item From 25K to 25M: in 1000 chunks of 25K
 326 \item Reference values:
 327 \begin{itemize}
 328 \item $K'=200$ super consumers (SC)
 329 \item  $K\ast=15$ final clusters
 330 \end{itemize}
 331 \end{itemize}
 332
 333
 334
 335 \begin{block}{1st strategy}
 336 \begin{itemize}
 337 \item Do 1000 times ONLY Energycon's 1st-step strategy on 25K clients
 338
 339 \item With the $1000 \times K'$ SC perform a 2-step run
 340       leading to $K^\ast$ clusters
 341 \end{itemize}
 342 \end{block}
 343
 344 \begin{block}{2nd strategy}
 345 \begin{itemize}
 346 \item Do 1000 times Energycon's 2-step strategy on 25K clients
 347       leading to $1000\times K^\ast$ intermediate clusters
 348 \item Treat the intermediate clusters as individual curves and perform
 349       a single 2-step run to get $K^\ast$ final clusters
 350 \end{itemize}
 351 \end{block}
 352
 353 \end{frame}
 354
 355
 356 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 357 %   FRAME:
 358 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 359
 360 \begin{frame}
 361 \frametitle{Course + Workshop}
 362
 363 \begin{block}{1-day IRSDI-ECAS Course}
 364 \begin{itemize}
 365 \item
 366 GAM : from classical to distributed environments
 367 \item
 368 October 19, 2017 @ EDF Labs, Paris-Saclay, France
 369 \item
 370 Simon Wood \& Matteo Fasiolo (University Walk, Bristol, UK)
 371 \end{itemize}
 372 \end{block}
 373
 374
 375 \begin{block}{1-day Worshop}
 376 \begin{itemize}
 377 \item
 378 Individual Electricity Consumers, Data, Packages and Methods
 379 \item
 380 October 20, 2017 @ EDF Labs, Paris-Saclay, France
 381 \item
 382 5 keynote speakers
 383 \begin{itemize}
 384 \item
 385 Souhaib Ben Taieb, Monash University, Melbourne, Australia
 386 \item
 387 Ram Rajagopal, Stanford Univ., USA
 388 \item
 389 Gavin Shaddick, University of Bath, UK
 390 \item
 391 Bei Chen, IBM Research, Ireland
 392 \item
 393 Jack Kelly, University of London, Imperial College of Science, UK
 394 \end{itemize}
 395 \end{itemize}
 396 \end{block}
 397 \end{frame}
 398
 399
 400 \section{Point sur les codes}
 401
 402 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 403 %   FRAME:
 404 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 405
 406 \begin{frame}{Résumé point sur le code (BA, JC @ Lyon déc 2016)}
 407
 408 Nous avons réussi à
 409 \begin{itemize}
 410 \item
 411 faire une fresh installation du code de BA sur une nouvelle machine
 412 (problèmes divers liés à la compilation, configuration, libraries exotiques)
 413 \item
 414 conduire des expériences sur les données pour mesurer le temps de calcul (le code est blazing fast: 30sec pour obtenir 500 groupes sur 4 procs)
 415 \item
 416 identifier de problèmes : manque un installateur et une interface de pretraitement indépendant du calcul
 417 \end{itemize}
 418
 419 A faire:
 420 \begin{itemize}
 421 \item
 422 finir les experiences (sur nb de classes, nb de curves / chunk, nb de procs) et sur d'autres architectures
 423 \item
 424 interface matrice -> binaire
 425 \item
 426 obtenir les courbes synchrones
 427 \end{itemize}
 428
 429 Piste à explorer pour les comparaisons: \texttt{h2o}
 430 \end{frame}
 431
 432 \section{Expériences numériques}
 433
 434 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 435 %   FRAME:
 436 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 437
 438 \begin{frame}
 439 \frametitle{Code C/MPI}
 440
 441 \begin{enumerate}
 442 \item [0]] Sérialisation des données : on écrit d'abord la longueur de la série puis les puissances sont codées sur 3 octets, permettant une excellente compression et une lecture facile (accès en O(1) à n'importe quelle série)
 443
 444 \item [1]] Algorithme PAM appliqué en parallèle via la librairie MPI.
 445
 446 \item [2]] Agrégation des médoïdes obtenus, (re-)sérialisation, puis on ré-applique l'algorithme PAM.
 447
 448 \end{enumerate}
 449
 450 \begin{itemize}
 451 \item
 452         %Plusieurs astuces : sérialisation des données, calcul en parallèle
 453 \item
 454         Très rapide : environ 5 minutes from raw to 1st stage clustering
 455         \item
 456         Divergences par rapport à Energycon (moyennes au lieu d'aggrégation)
 457     \end{itemize}
 458
 459     %\begin{verbatim}
 460     %> time ./ppam.exe serialize 2009.csv 2009.bin 1 0
 461     %real   7m34.182s
 462     %\end{verbatim}
 463 \end{frame}
 464
 465
 466 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 467 %   FRAME:
 468 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 469
 470 \begin{frame}
 471 \frametitle{R Code}
 472
 473 \begin{itemize}
 474 \item Enercon's code update
 475 \item \texttt{data.table} is used for readings and writtings \footnote{\texttt{tidyverse} toolbox is much slower}
 476 \item Disk spaces of the plain text associated object
 477 \item Timmings on \texttt{pulpito}  (16 cores, 64Gb RAM, SSD)
 478 \end{itemize}
 479 \begin{center}
 480 \begin{tabular}{lccc}\toprule
 481 Task & Time & Memory & Disk \\ \midrule
 482 Raw (15Gb) to matrix  &   7 min  &
 483   30 Gb\footnote{\texttt{ff} is a promising alternative if needed} &
 484   2.7 Gb \\
 485 Compute contributions  &   7 min  & <1Gb & 7 Mb \\
 486 1st stage clustering   &   3 min  & <1Gb & -- \\
 487 Aggregation            &   1 min  &  6Gb & 30 Mb \\
 488 Wer distance matrix    &  40 min  & 64Gb\footnote{Embarransgly parallel but still too slow} & 150 Kb \\
 489 Forecasts              &  10 min  & <1Gb & --\\
 490 \bottomrule
 491 \end{tabular}
 492 \end{center}
 493 \end{frame}
 494
 495 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 496 %   FRAME:
 497 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 498
 499 \begin{frame}
 500 \frametitle{Why wer distance is so slow ?}
 501
 502
 503 \begin{block}{2nd step strategy (Enercon way)}
 504 % We proceed as follows:
 505  \begin{itemize}
 506     \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
 507     \item Compute the wer-based dissimilarity matrix
 508     \item Obtain the PAM-based clustering.
 509 \end{itemize}
 510
 511 \begin{block}{Current choices on the computation}
 512 \begin{itemize}
 513 \item From (\texttt{Rwave} \& \texttt{sowas}) to \texttt{biwavelt}
 514 \item About 1 sec to compute \texttt{werd(x, y)} with current
 515       filtering ($J \sim 52$ with 13 octaves, 4 voices )
 516 \item Need to compute $n (n - 1) / 2$ pairwise distances
 517      (20K, 130K, 500K entries for $n = 200, 500, 1000$)
 518 \item Need an efficient \texttt{werd} function (maybe in
 519       RcppParallel ?)
 520 \end{itemize}
 521
 522 \end{block}
 523
 524
 525 \end{block}
 526
 527 \end{frame}
 528
 529
 530
 531 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 532 %   FRAME:
 533 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 534
 535 \begin{frame}{CWT}
 536
 537 \begin{block}{Continuous WT}
 538 Starting with a mother wavelet $\psi$ consider $\psi_{a, \tau} = a^{-1/2} \psi\left(\frac{t-\tau}{a}\right)$.
 539
 540 The CWT of a function $z\in L^2 (\mathbb{R})$ is,
 541 $$ W_z(a, \tau) = \int_{-\infty}^{\infty} z(t) \psi_{a, \tau}^* (t) dt$$
 542
 543 As for Fourier transform, a spectral approach is possible.
 544
 545
 546 \begin{eqnarray*}
 547 S_z(a, \tau)  &=& |W_z(a, \tau)|^2 \qquad\qquad \hbox{wavelet spectrum} \\
 548 \mathcal{W}_{z, x}(a, \tau)  &=& W_z(a, \tau)W_x^*(a, \tau) \qquad \hbox{cross-wavelet transform}
 549 \end{eqnarray*}
 550
 551 %$$ S_z(a, \tau) = |W_z(a, \tau)|^2 \qquad \hbox{wavelet spectrum}$$
 552
 553 %$$ \mathcal{W}_{z, x}(a, \tau) = W_z(a, \tau)W_x(a, \tau)^* \qquad \hbox{cross-wavelet transform}$$
 554 \end{block}
 555
 556 \end{frame}
 557
 558
 559 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 560 %   FRAME:
 561 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 562
 563 \begin{frame}{Wavelet coherence}
 564 \begin{block}{ }
 565 \begin{equation*} \label{coherence}
 566 R_{z,x}^2(a,\tau) = \frac{ |\tilde{\mathcal{W}}_{x,y}(a, \tau)|^2 }{|\tilde{\mathcal{W}}_{x,x}(a, \tau)| |\tilde{\mathcal{W}}_{y,y}(a, \tau) | },
 567 \end{equation*}
 568
 569 Based on the extended $R^2$ coefficient, we can construct an coefficient of determination between two wavelet spectrums
 570
 571 \begin{equation*}\label{eq:wer}
 572   WER_{z, x}^2 = \frac{
 573  \int_0^\infty  \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, x}(a, \tau)|  d\tau \right)^2 da} { \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, z}(a, \tau)| d\tau \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{x, x}(a, \tau)| d\tau\right) da}.
 574  \end{equation*}
 575
 576 And obtain a dissimilarity based on it
 577
 578 \begin{equation*}\label{eq:dist-wer}
 579     d(z, x) = \sqrt{ JN(1 - \widehat{WER}_{z, x}^2)}
 580 \end{equation*}
 581 \end{block}
 582 \end{frame}
 583
 584 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 585 %   FRAME:
 586 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 587
 588 %\begin{frame} \frametitle{Wavelet coherence}
 589 %\begin{block}{ }
 590 % We proceed as follows:
 591 % \begin{itemize}
 592 %    \item Transform data $z_1(t), \ldots, z_n(t)$ using the  CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
 593 %    \item Compute a dissimilarity matrix with the coherency based dissimilarity.
 594 %    \item Using PAM obtain clusters $k=8$ clusters.
 595 %   \end{itemize}
 596 %
 597 %  Rand Index (AC, WER) = 0.26
 598 %
 599 %\end{block}
 600 %
 601 %\end{frame}
 602
 603
 604 \end{document}
 605
 606
 607 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 608 %   FRAME:
 609 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 610
 611 \begin{frame}
 612 \frametitle{Misc jc}
 613
 614 \begin{itemize}
 615 \item simulated dataset : howto ?
 616 \item temperature
 617 \item Rcpp
 618 \end{itemize}
 619
 620
 621