set final draft for package
[epclust.git] / reports / 2017-01 / 201701_point.tex
1 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
2 %
3 % LOADING DOCUMENT
4 %
5 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
6 \documentclass[10pt]{beamer}
7
8 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
9 % LOADING PACKAGES
10 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
11 \usepackage[utf8]{inputenc}
12 \usepackage[T1]{fontenc}
13 \usepackage[francais]{babel}
14 \usepackage{amsmath}
15 \usepackage{amsfonts}
16 \usepackage{amssymb}
17 \usepackage{graphicx}
18 \usepackage{booktabs}
19 \usetheme{default}
20 \usepackage{tikz}
21
22 \colorlet{darkred}{red!80!black}
23 \colorlet{darkblue}{blue!80!black}
24 \colorlet{darkgreen}{green!60!black}
25
26 \usetikzlibrary{calc,decorations.pathmorphing,patterns}
27 \pgfdeclaredecoration{penciline}{initial}{
28 \state{initial}[width=+\pgfdecoratedinputsegmentremainingdistance,
29 auto corner on length=1mm,]{
30 \pgfpathcurveto%
31 {% From
32 \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}
33 {\pgfdecorationsegmentamplitude}
34 }
35 {% Control 1
36 \pgfmathrand
37 \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt}}
38 {\pgfqpoint{-\pgfdecorationsegmentaspect
39 \pgfdecoratedinputsegmentremainingdistance}%
40 {\pgfmathresult\pgfdecorationsegmentamplitude}
41 }
42 }
43 {%TO
44 \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt}{1pt}}
45 }
46 }
47 \state{final}{}
48 }
49 \tikzstyle{block} = [draw,rectangle,thick,minimum height=2em,minimum width=2em]
50
51
52 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
53 % BEAMER OPTIONS
54 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
55 %\setbeameroption{show notes}
56 \setbeamertemplate{navigation symbols}{}
57 %\setbeamercovered{transparent}
58 %\AtBeginSection[]{
59 %\begin{frame}{Outline}
60 % \tableofcontents[currentsection]
61 %\end{frame}
62 %}
63
64 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
65 %
66 % PRESENTATION INFORMATION
67 %
68 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
69
70 \author{B. Auder \and
71 J. Cugliari \and
72 Y. Goude \and
73 J.-M. Poggi
74 }
75 \title{Disaggregated Electricity Forecasting using Clustering of
76 Individual Consumers}
77 \subtitle{Réunion mi parcours}
78 %\logo{}
79 \institute{IRSDI - RESEARCH INITIATIVE IN INDUSTRIAL DATA SCIENCE}
80 \date{19 janvier 2017}
81 %\subject{tito}
82
83 \begin{document}
84
85 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
86 %
87 % TITLE PAGE
88 %
89 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
90
91 \frame[plain]{\maketitle}
92 %\maketitle
93
94
95 \section{IRSDI follow up meeting}
96
97 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
98 % FRAME:
99 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
100
101 \begin{frame}{The project in a nutshell}
102 \begin{block}{Context}
103 \begin{itemize}
104 \item
105 Industrial : Electricity load forecasting \& smart grids infrastructure
106 \item
107 Academic : curve's shape \& nonparametric function-valued forecast
108 \item
109 Past work : clustering with wavelets (RC, Wer), KWF, Enercon
110 \end{itemize}
111 \end{block}
112
113 \begin{block}{Aims}
114 \begin{itemize}
115 \item evaluate the upscaling capacity of the Energycon strategy
116 \item adapt KWF to an exogenous variable (e.g. meteorological)
117 \end{itemize}
118 \end{block}
119 \end{frame}
120
121 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
122 % FRAME:
123 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
124
125 \begin{frame}{Clients hierarchical structure and prediction}
126
127 \begin{columns}
128 \column{.6\textwidth}
129 \begin{figure}[!ht]\centering
130 \includegraphics[width = \textwidth]{pics/schema.png}
131 \caption{Hierarchical structure of $N$ individual clients among $K$
132 groups.}\label{fig:schema-hier}
133 \end{figure}
134
135 \column{.4\textwidth}
136 \begin{tikzpicture}[decoration=penciline, decorate]
137 \node[block, decorate] at (0, 0){$Z_t$} ;
138 \node[block, decorate] at (3, 0) {$Z_{t + 1}$} ;
139
140 \node[block, decorate] at (0, -2.5) {$\begin{pmatrix}
141 Z_{t, 1} \\ Z_{t, 2} \\ \vdots \\ Z_{t, K}
142 \end{pmatrix}$ };
143
144 \node[block, decorate] at (3, -2.5) {$\begin{pmatrix}
145 Z_{t+1, 1} \\ Z_{t+1, 2} \\ \vdots \\ Z_{t+1, k}
146 \end{pmatrix} $};
147
148 \draw[decorate, darkblue, line width = 2mm, ->] (1, 0) -- (2, 0);
149 \draw[decorate, darkgreen, line width = 2mm, ->] (1, -2.5) -- (2, -2.5);
150 \draw[decorate, black, line width = 2mm, ->] (3, -1.3) -- (3, -0.4);
151 \draw[decorate, darkred, line width = 2mm, ->] (1, -1.5) -- (2, -0.75);
152 \end{tikzpicture}
153 \end{columns}
154
155 \begin{itemize}
156 \item $Z_t$: aggregate demand at $t$
157 \hfill $Z_{t, k}$:demand of group $k$ at moment $t$
158 \item Groups can express tariffs, geographical dispersion, client class ...
159 \item Profiling vs Prediction
160 \item We follow Misiti \textit{et al}. (2010) to construct classes of customers to better predict the aggregate.
161 \end{itemize}
162 \end{frame}
163
164
165 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
166 % FRAME:
167 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
168
169 \begin{frame}{Expected result}
170
171 \includegraphics[width = \textwidth]{pics/perf.pdf}
172 \end{frame}
173
174 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
175 % FRAME:
176 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
177
178 \begin{frame}{Energy decomposition of the DWT}
179
180 %\begin{block}{ }
181 \begin{itemize}
182 \item Energy conservation of the signal
183 %
184 \begin{equation*}\label{eq:energy}
185 \| z\|^2_H \approx \| \widetilde{z_J} \|_2^2
186 = c_{0,0}^2 + \sum_{j=0}^{J-1} \sum_{k=0}^{2^j-1} d_{j,k} ^2 =
187 c_{0,0}^2 + \sum_{j=0}^{J-1} \| \mathbf{d}_{j} \|_2^2.
188 \end{equation*}
189 % \item characterization by the set of channel variances estimated at the output of the corresponding filter bank
190 \item For each $j=0,1,\ldots,J-1$, we compute the \textcolor{blue}{absolute} and
191 \textcolor{orange}{relative} contribution representations by
192 %
193 \[ \underbrace{\hbox{cont}_j = ||\mathbf{d_j}||^2}_{\fbox{\textcolor{blue}{AC}}}
194 \qquad \text{and} \qquad
195 \underbrace{\hbox{rel}_j =
196 \frac{||\mathbf{d_j}||^2}
197 {\sum_j ||\mathbf{d_j}||^2 }}_{\fbox{\textcolor{orange}{RC}}} .\]
198 %\item They quantify the relative importance of the scales to the global dynamic.
199 % \item Only the wavelet coefficients $\set{d_{j,k}}$ are used.
200 % \item RC normalizes the energy of each signal to 1.
201 \end{itemize}
202 %\end{block}
203 %\end{frame}
204
205 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
206 % FRAME:
207 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
208
209 %\begin{frame}
210 % \frametitle{Schema of procedure}
211 \begin{center}
212 \includegraphics[width = 7cm, height = 2cm]{./pics/Diagramme1.png}
213 % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260
214 \end{center}
215
216 \begin{footnotesize}
217 \begin{description}
218 \item [0. Data preprocessing.] Approximate sample paths of $z_1(t),\ldots,z_n(t)$ %by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$.
219 \item [1. Feature extraction.] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC).
220 \item [2. Feature selection.] Screen irrelevant variables. \begin{tiny} [Steinley \& Brusco ('06)]\end{tiny}
221 %\item [3. Determine the number of clusters.] Detecting significant jumps %in the transformed distortion curve.
222 %\begin{tiny} [Sugar \& James ('03)]\end{tiny}
223 %\item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm.
224 \end{description} \end{footnotesize}
225
226 \end{frame}
227
228 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
229 % FRAME:
230 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
231
232 \begin{frame}
233 \frametitle{A function-based distance}
234
235 \begin{columns}
236 \column{0.6\textwidth}
237 \begin{itemize}
238 \item Distance based on wavelet-correlation between two time series
239 \item Can be used to measure relationship between two functions
240 %variables, i.e. temperature and load.
241 \item The strength of the relation is hierarchically decomposed across
242 scales without losing of time location
243 \end{itemize}
244
245 Drawback: needs more computation time and storage (complex values)
246 \column{0.4\textwidth}
247 \includegraphics[width = \textwidth]{pics/conso-week.png}
248
249 \includegraphics[width = .96\textwidth]{pics/wsp-week.png}
250
251 \end{columns}
252 \end{frame}
253
254 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
255 % FRAME:
256 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
257
258 \begin{frame}{A 2-stages strategy (Energycon)}
259
260 \includegraphics[width = \textwidth]{pics/2-stage_strategy.png}
261
262 \footnotetext[1]{
263 J. Cugliari, Y. Goude and J. M. Poggi, "Disaggregated electricity forecasting using wavelet-based clustering of individual consumers," 2016 IEEE International Energy Conference (ENERGYCON), Leuven, 2016, pp. 1-6.
264 }
265
266 \end{frame}
267
268 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
269 % FRAME:
270 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
271
272 \begin{frame}{Data description}
273
274 \begin{columns}
275 \column{0.45\textwidth}
276 \begin{block}{Available}
277 \begin{itemize}
278 \item
279 EDF : 25K professional clients, sampled @ 30min, 5 semesters
280 \item
281 external open data
282 \end{itemize}
283 \end{block}
284
285 \begin{block}{Accesible}
286 \begin{itemize}
287 \item simulated (very large) data
288 \end{itemize}
289 \end{block}
290 \column{0.55\textwidth}
291 \includegraphics[width = \columnwidth]{pics/indiv.jpg}
292 %\textcolor{red}{A picture here?}
293 \end{columns}
294
295 \end{frame}
296
297
298 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
299 % FRAME:
300 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
301
302 \begin{frame}{Computing resources}
303 \begin{block}{2 academic testing architectures}
304 \begin{itemize}
305 \item Orsay's cluster (500Gb RAM, 80 cores)
306 \item \texttt{pulpito} : Lyon 2's box with 2 quadricores (HT x 2), 72Gb RAM
307 \end{itemize}
308 \end{block}
309
310 \begin{block}{1 industrial real-scale architecture}
311 \begin{itemize}
312 \item mini cluster @ EDF labs
313 \end{itemize}
314 \end{block}
315
316 \end{frame}
317
318 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
319 % FRAME:
320 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
321
322 \begin{frame}{Strategies for upscaling}
323
324 \begin{itemize}
325 \item From 25K to 25M: in 1000 chunks of 25K
326 \item Reference values:
327 \begin{itemize}
328 \item $K'=200$ super consumers (SC)
329 \item $K\ast=15$ final clusters
330 \end{itemize}
331 \end{itemize}
332
333
334
335 \begin{block}{1st strategy}
336 \begin{itemize}
337 \item Do 1000 times ONLY Energycon's 1st-step strategy on 25K clients
338
339 \item With the $1000 \times K'$ SC perform a 2-step run
340 leading to $K^\ast$ clusters
341 \end{itemize}
342 \end{block}
343
344 \begin{block}{2nd strategy}
345 \begin{itemize}
346 \item Do 1000 times Energycon's 2-step strategy on 25K clients
347 leading to $1000\times K^\ast$ intermediate clusters
348 \item Treat the intermediate clusters as individual curves and perform
349 a single 2-step run to get $K^\ast$ final clusters
350 \end{itemize}
351 \end{block}
352
353 \end{frame}
354
355
356 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
357 % FRAME:
358 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
359
360 \begin{frame}
361 \frametitle{Course + Workshop}
362
363 \begin{block}{1-day IRSDI-ECAS Course}
364 \begin{itemize}
365 \item
366 GAM : from classical to distributed environments
367 \item
368 October 19, 2017 @ EDF Labs, Paris-Saclay, France
369 \item
370 Simon Wood \& Matteo Fasiolo (University Walk, Bristol, UK)
371 \end{itemize}
372 \end{block}
373
374
375 \begin{block}{1-day Worshop}
376 \begin{itemize}
377 \item
378 Individual Electricity Consumers, Data, Packages and Methods
379 \item
380 October 20, 2017 @ EDF Labs, Paris-Saclay, France
381 \item
382 5 keynote speakers
383 \begin{itemize}
384 \item
385 Souhaib Ben Taieb, Monash University, Melbourne, Australia
386 \item
387 Ram Rajagopal, Stanford Univ., USA
388 \item
389 Gavin Shaddick, University of Bath, UK
390 \item
391 Bei Chen, IBM Research, Ireland
392 \item
393 Jack Kelly, University of London, Imperial College of Science, UK
394 \end{itemize}
395 \end{itemize}
396 \end{block}
397 \end{frame}
398
399
400 \section{Point sur les codes}
401
402 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
403 % FRAME:
404 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
405
406 \begin{frame}{Résumé point sur le code (BA, JC @ Lyon déc 2016)}
407
408 Nous avons réussi à
409 \begin{itemize}
410 \item
411 faire une fresh installation du code de BA sur une nouvelle machine
412 (problèmes divers liés à la compilation, configuration, libraries exotiques)
413 \item
414 conduire des expériences sur les données pour mesurer le temps de calcul (le code est blazing fast: 30sec pour obtenir 500 groupes sur 4 procs)
415 \item
416 identifier de problèmes : manque un installateur et une interface de pretraitement indépendant du calcul
417 \end{itemize}
418
419 A faire:
420 \begin{itemize}
421 \item
422 finir les experiences (sur nb de classes, nb de curves / chunk, nb de procs) et sur d'autres architectures
423 \item
424 interface matrice -> binaire
425 \item
426 obtenir les courbes synchrones
427 \end{itemize}
428
429 Piste à explorer pour les comparaisons: \texttt{h2o}
430 \end{frame}
431
432 \section{Expériences numériques}
433
434 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
435 % FRAME:
436 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
437
438 \begin{frame}
439 \frametitle{Code C/MPI}
440
441 \begin{enumerate}
442 \item [0]] Sérialisation des données : on écrit d'abord la longueur de la série puis les puissances sont codées sur 3 octets, permettant une excellente compression et une lecture facile (accès en O(1) à n'importe quelle série)
443
444 \item [1]] Algorithme PAM appliqué en parallèle via la librairie MPI.
445
446 \item [2]] Agrégation des médoïdes obtenus, (re-)sérialisation, puis on ré-applique l'algorithme PAM.
447
448 \end{enumerate}
449
450 \begin{itemize}
451 \item
452 %Plusieurs astuces : sérialisation des données, calcul en parallèle
453 \item
454 Très rapide : environ 5 minutes from raw to 1st stage clustering
455 \item
456 Divergences par rapport à Energycon (moyennes au lieu d'aggrégation)
457 \end{itemize}
458
459 %\begin{verbatim}
460 %> time ./ppam.exe serialize 2009.csv 2009.bin 1 0
461 %real 7m34.182s
462 %\end{verbatim}
463 \end{frame}
464
465
466 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
467 % FRAME:
468 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
469
470 \begin{frame}
471 \frametitle{R Code}
472
473 \begin{itemize}
474 \item Enercon's code update
475 \item \texttt{data.table} is used for readings and writtings \footnote{\texttt{tidyverse} toolbox is much slower}
476 \item Disk spaces of the plain text associated object
477 \item Timmings on \texttt{pulpito} (16 cores, 64Gb RAM, SSD)
478 \end{itemize}
479 \begin{center}
480 \begin{tabular}{lccc}\toprule
481 Task & Time & Memory & Disk \\ \midrule
482 Raw (15Gb) to matrix & 7 min &
483 30 Gb\footnote{\texttt{ff} is a promising alternative if needed} &
484 2.7 Gb \\
485 Compute contributions & 7 min & <1Gb & 7 Mb \\
486 1st stage clustering & 3 min & <1Gb & -- \\
487 Aggregation & 1 min & 6Gb & 30 Mb \\
488 Wer distance matrix & 40 min & 64Gb\footnote{Embarransgly parallel but still too slow} & 150 Kb \\
489 Forecasts & 10 min & <1Gb & --\\
490 \bottomrule
491 \end{tabular}
492 \end{center}
493 \end{frame}
494
495 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
496 % FRAME:
497 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
498
499 \begin{frame}
500 \frametitle{Why wer distance is so slow ?}
501
502
503 \begin{block}{2nd step strategy (Enercon way)}
504 % We proceed as follows:
505 \begin{itemize}
506 \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
507 \item Compute the wer-based dissimilarity matrix
508 \item Obtain the PAM-based clustering.
509 \end{itemize}
510
511 \begin{block}{Current choices on the computation}
512 \begin{itemize}
513 \item From (\texttt{Rwave} \& \texttt{sowas}) to \texttt{biwavelt}
514 \item About 1 sec to compute \texttt{werd(x, y)} with current
515 filtering ($J \sim 52$ with 13 octaves, 4 voices )
516 \item Need to compute $n (n - 1) / 2$ pairwise distances
517 (20K, 130K, 500K entries for $n = 200, 500, 1000$)
518 \item Need an efficient \texttt{werd} function (maybe in
519 RcppParallel ?)
520 \end{itemize}
521
522 \end{block}
523
524
525 \end{block}
526
527 \end{frame}
528
529
530
531 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
532 % FRAME:
533 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
534
535 \begin{frame}{CWT}
536
537 \begin{block}{Continuous WT}
538 Starting with a mother wavelet $\psi$ consider $\psi_{a, \tau} = a^{-1/2} \psi\left(\frac{t-\tau}{a}\right)$.
539
540 The CWT of a function $z\in L^2 (\mathbb{R})$ is,
541 $$ W_z(a, \tau) = \int_{-\infty}^{\infty} z(t) \psi_{a, \tau}^* (t) dt$$
542
543 As for Fourier transform, a spectral approach is possible.
544
545
546 \begin{eqnarray*}
547 S_z(a, \tau) &=& |W_z(a, \tau)|^2 \qquad\qquad \hbox{wavelet spectrum} \\
548 \mathcal{W}_{z, x}(a, \tau) &=& W_z(a, \tau)W_x^*(a, \tau) \qquad \hbox{cross-wavelet transform}
549 \end{eqnarray*}
550
551 %$$ S_z(a, \tau) = |W_z(a, \tau)|^2 \qquad \hbox{wavelet spectrum}$$
552
553 %$$ \mathcal{W}_{z, x}(a, \tau) = W_z(a, \tau)W_x(a, \tau)^* \qquad \hbox{cross-wavelet transform}$$
554 \end{block}
555
556 \end{frame}
557
558
559 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
560 % FRAME:
561 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
562
563 \begin{frame}{Wavelet coherence}
564 \begin{block}{ }
565 \begin{equation*} \label{coherence}
566 R_{z,x}^2(a,\tau) = \frac{ |\tilde{\mathcal{W}}_{x,y}(a, \tau)|^2 }{|\tilde{\mathcal{W}}_{x,x}(a, \tau)| |\tilde{\mathcal{W}}_{y,y}(a, \tau) | },
567 \end{equation*}
568
569 Based on the extended $R^2$ coefficient, we can construct an coefficient of determination between two wavelet spectrums
570
571 \begin{equation*}\label{eq:wer}
572 WER_{z, x}^2 = \frac{
573 \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, x}(a, \tau)| d\tau \right)^2 da} { \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, z}(a, \tau)| d\tau \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{x, x}(a, \tau)| d\tau\right) da}.
574 \end{equation*}
575
576 And obtain a dissimilarity based on it
577
578 \begin{equation*}\label{eq:dist-wer}
579 d(z, x) = \sqrt{ JN(1 - \widehat{WER}_{z, x}^2)}
580 \end{equation*}
581 \end{block}
582 \end{frame}
583
584 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
585 % FRAME:
586 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
587
588 %\begin{frame} \frametitle{Wavelet coherence}
589 %\begin{block}{ }
590 % We proceed as follows:
591 % \begin{itemize}
592 % \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
593 % \item Compute a dissimilarity matrix with the coherency based dissimilarity.
594 % \item Using PAM obtain clusters $k=8$ clusters.
595 % \end{itemize}
596 %
597 % Rand Index (AC, WER) = 0.26
598 %
599 %\end{block}
600 %
601 %\end{frame}
602
603
604 \end{document}
605
606
607 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
608 % FRAME:
609 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
610
611 \begin{frame}
612 \frametitle{Misc jc}
613
614 \begin{itemize}
615 \item simulated dataset : howto ?
616 \item temperature
617 \item Rcpp
618 \end{itemize}
619
620
621