| 1 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 2 | % |
| 3 | % LOADING DOCUMENT |
| 4 | % |
| 5 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 6 | \documentclass[10pt]{beamer} |
| 7 | |
| 8 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 9 | % LOADING PACKAGES |
| 10 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 11 | \usepackage[utf8]{inputenc} |
| 12 | \usepackage[T1]{fontenc} |
| 13 | \usepackage[francais]{babel} |
| 14 | \usepackage{amsmath} |
| 15 | \usepackage{amsfonts} |
| 16 | \usepackage{amssymb} |
| 17 | \usepackage{graphicx} |
| 18 | \usepackage{booktabs} |
| 19 | \usetheme{default} |
| 20 | \usepackage{tikz} |
| 21 | |
| 22 | \colorlet{darkred}{red!80!black} |
| 23 | \colorlet{darkblue}{blue!80!black} |
| 24 | \colorlet{darkgreen}{green!60!black} |
| 25 | |
| 26 | \usetikzlibrary{calc,decorations.pathmorphing,patterns} |
| 27 | \pgfdeclaredecoration{penciline}{initial}{ |
| 28 | \state{initial}[width=+\pgfdecoratedinputsegmentremainingdistance, |
| 29 | auto corner on length=1mm,]{ |
| 30 | \pgfpathcurveto% |
| 31 | {% From |
| 32 | \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance} |
| 33 | {\pgfdecorationsegmentamplitude} |
| 34 | } |
| 35 | {% Control 1 |
| 36 | \pgfmathrand |
| 37 | \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt}} |
| 38 | {\pgfqpoint{-\pgfdecorationsegmentaspect |
| 39 | \pgfdecoratedinputsegmentremainingdistance}% |
| 40 | {\pgfmathresult\pgfdecorationsegmentamplitude} |
| 41 | } |
| 42 | } |
| 43 | {%TO |
| 44 | \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt}{1pt}} |
| 45 | } |
| 46 | } |
| 47 | \state{final}{} |
| 48 | } |
| 49 | \tikzstyle{block} = [draw,rectangle,thick,minimum height=2em,minimum width=2em] |
| 50 | |
| 51 | |
| 52 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 53 | % BEAMER OPTIONS |
| 54 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 55 | %\setbeameroption{show notes} |
| 56 | \setbeamertemplate{navigation symbols}{} |
| 57 | %\setbeamercovered{transparent} |
| 58 | %\AtBeginSection[]{ |
| 59 | %\begin{frame}{Outline} |
| 60 | % \tableofcontents[currentsection] |
| 61 | %\end{frame} |
| 62 | %} |
| 63 | |
| 64 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 65 | % |
| 66 | % PRESENTATION INFORMATION |
| 67 | % |
| 68 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 69 | |
| 70 | \author{B. Auder \and |
| 71 | J. Cugliari \and |
| 72 | Y. Goude \and |
| 73 | J.-M. Poggi |
| 74 | } |
| 75 | \title{Disaggregated Electricity Forecasting using Clustering of |
| 76 | Individual Consumers} |
| 77 | \subtitle{Réunion mi parcours} |
| 78 | %\logo{} |
| 79 | \institute{IRSDI - RESEARCH INITIATIVE IN INDUSTRIAL DATA SCIENCE} |
| 80 | \date{19 janvier 2017} |
| 81 | %\subject{tito} |
| 82 | |
| 83 | \begin{document} |
| 84 | |
| 85 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 86 | % |
| 87 | % TITLE PAGE |
| 88 | % |
| 89 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 90 | |
| 91 | \frame[plain]{\maketitle} |
| 92 | %\maketitle |
| 93 | |
| 94 | |
| 95 | \section{IRSDI follow up meeting} |
| 96 | |
| 97 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 98 | % FRAME: |
| 99 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 100 | |
| 101 | \begin{frame}{The project in a nutshell} |
| 102 | \begin{block}{Context} |
| 103 | \begin{itemize} |
| 104 | \item |
| 105 | Industrial : Electricity load forecasting \& smart grids infrastructure |
| 106 | \item |
| 107 | Academic : curve's shape \& nonparametric function-valued forecast |
| 108 | \item |
| 109 | Past work : clustering with wavelets (RC, Wer), KWF, Enercon |
| 110 | \end{itemize} |
| 111 | \end{block} |
| 112 | |
| 113 | \begin{block}{Aims} |
| 114 | \begin{itemize} |
| 115 | \item evaluate the upscaling capacity of the Energycon strategy |
| 116 | \item adapt KWF to an exogenous variable (e.g. meteorological) |
| 117 | \end{itemize} |
| 118 | \end{block} |
| 119 | \end{frame} |
| 120 | |
| 121 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 122 | % FRAME: |
| 123 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 124 | |
| 125 | \begin{frame}{Clients hierarchical structure and prediction} |
| 126 | |
| 127 | \begin{columns} |
| 128 | \column{.6\textwidth} |
| 129 | \begin{figure}[!ht]\centering |
| 130 | \includegraphics[width = \textwidth]{pics/schema.png} |
| 131 | \caption{Hierarchical structure of $N$ individual clients among $K$ |
| 132 | groups.}\label{fig:schema-hier} |
| 133 | \end{figure} |
| 134 | |
| 135 | \column{.4\textwidth} |
| 136 | \begin{tikzpicture}[decoration=penciline, decorate] |
| 137 | \node[block, decorate] at (0, 0){$Z_t$} ; |
| 138 | \node[block, decorate] at (3, 0) {$Z_{t + 1}$} ; |
| 139 | |
| 140 | \node[block, decorate] at (0, -2.5) {$\begin{pmatrix} |
| 141 | Z_{t, 1} \\ Z_{t, 2} \\ \vdots \\ Z_{t, K} |
| 142 | \end{pmatrix}$ }; |
| 143 | |
| 144 | \node[block, decorate] at (3, -2.5) {$\begin{pmatrix} |
| 145 | Z_{t+1, 1} \\ Z_{t+1, 2} \\ \vdots \\ Z_{t+1, k} |
| 146 | \end{pmatrix} $}; |
| 147 | |
| 148 | \draw[decorate, darkblue, line width = 2mm, ->] (1, 0) -- (2, 0); |
| 149 | \draw[decorate, darkgreen, line width = 2mm, ->] (1, -2.5) -- (2, -2.5); |
| 150 | \draw[decorate, black, line width = 2mm, ->] (3, -1.3) -- (3, -0.4); |
| 151 | \draw[decorate, darkred, line width = 2mm, ->] (1, -1.5) -- (2, -0.75); |
| 152 | \end{tikzpicture} |
| 153 | \end{columns} |
| 154 | |
| 155 | \begin{itemize} |
| 156 | \item $Z_t$: aggregate demand at $t$ |
| 157 | \hfill $Z_{t, k}$:demand of group $k$ at moment $t$ |
| 158 | \item Groups can express tariffs, geographical dispersion, client class ... |
| 159 | \item Profiling vs Prediction |
| 160 | \item We follow Misiti \textit{et al}. (2010) to construct classes of customers to better predict the aggregate. |
| 161 | \end{itemize} |
| 162 | \end{frame} |
| 163 | |
| 164 | |
| 165 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 166 | % FRAME: |
| 167 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 168 | |
| 169 | \begin{frame}{Expected result} |
| 170 | |
| 171 | \includegraphics[width = \textwidth]{pics/perf.pdf} |
| 172 | \end{frame} |
| 173 | |
| 174 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 175 | % FRAME: |
| 176 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 177 | |
| 178 | \begin{frame}{Energy decomposition of the DWT} |
| 179 | |
| 180 | %\begin{block}{ } |
| 181 | \begin{itemize} |
| 182 | \item Energy conservation of the signal |
| 183 | % |
| 184 | \begin{equation*}\label{eq:energy} |
| 185 | \| z\|^2_H \approx \| \widetilde{z_J} \|_2^2 |
| 186 | = c_{0,0}^2 + \sum_{j=0}^{J-1} \sum_{k=0}^{2^j-1} d_{j,k} ^2 = |
| 187 | c_{0,0}^2 + \sum_{j=0}^{J-1} \| \mathbf{d}_{j} \|_2^2. |
| 188 | \end{equation*} |
| 189 | % \item characterization by the set of channel variances estimated at the output of the corresponding filter bank |
| 190 | \item For each $j=0,1,\ldots,J-1$, we compute the \textcolor{blue}{absolute} and |
| 191 | \textcolor{orange}{relative} contribution representations by |
| 192 | % |
| 193 | \[ \underbrace{\hbox{cont}_j = ||\mathbf{d_j}||^2}_{\fbox{\textcolor{blue}{AC}}} |
| 194 | \qquad \text{and} \qquad |
| 195 | \underbrace{\hbox{rel}_j = |
| 196 | \frac{||\mathbf{d_j}||^2} |
| 197 | {\sum_j ||\mathbf{d_j}||^2 }}_{\fbox{\textcolor{orange}{RC}}} .\] |
| 198 | %\item They quantify the relative importance of the scales to the global dynamic. |
| 199 | % \item Only the wavelet coefficients $\set{d_{j,k}}$ are used. |
| 200 | % \item RC normalizes the energy of each signal to 1. |
| 201 | \end{itemize} |
| 202 | %\end{block} |
| 203 | %\end{frame} |
| 204 | |
| 205 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 206 | % FRAME: |
| 207 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 208 | |
| 209 | %\begin{frame} |
| 210 | % \frametitle{Schema of procedure} |
| 211 | \begin{center} |
| 212 | \includegraphics[width = 7cm, height = 2cm]{./pics/Diagramme1.png} |
| 213 | % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260 |
| 214 | \end{center} |
| 215 | |
| 216 | \begin{footnotesize} |
| 217 | \begin{description} |
| 218 | \item [0. Data preprocessing.] Approximate sample paths of $z_1(t),\ldots,z_n(t)$ %by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$. |
| 219 | \item [1. Feature extraction.] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC). |
| 220 | \item [2. Feature selection.] Screen irrelevant variables. \begin{tiny} [Steinley \& Brusco ('06)]\end{tiny} |
| 221 | %\item [3. Determine the number of clusters.] Detecting significant jumps %in the transformed distortion curve. |
| 222 | %\begin{tiny} [Sugar \& James ('03)]\end{tiny} |
| 223 | %\item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm. |
| 224 | \end{description} \end{footnotesize} |
| 225 | |
| 226 | \end{frame} |
| 227 | |
| 228 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 229 | % FRAME: |
| 230 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 231 | |
| 232 | \begin{frame} |
| 233 | \frametitle{A function-based distance} |
| 234 | |
| 235 | \begin{columns} |
| 236 | \column{0.6\textwidth} |
| 237 | \begin{itemize} |
| 238 | \item Distance based on wavelet-correlation between two time series |
| 239 | \item Can be used to measure relationship between two functions |
| 240 | %variables, i.e. temperature and load. |
| 241 | \item The strength of the relation is hierarchically decomposed across |
| 242 | scales without losing of time location |
| 243 | \end{itemize} |
| 244 | |
| 245 | Drawback: needs more computation time and storage (complex values) |
| 246 | \column{0.4\textwidth} |
| 247 | \includegraphics[width = \textwidth]{pics/conso-week.png} |
| 248 | |
| 249 | \includegraphics[width = .96\textwidth]{pics/wsp-week.png} |
| 250 | |
| 251 | \end{columns} |
| 252 | \end{frame} |
| 253 | |
| 254 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 255 | % FRAME: |
| 256 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 257 | |
| 258 | \begin{frame}{A 2-stages strategy (Energycon)} |
| 259 | |
| 260 | \includegraphics[width = \textwidth]{pics/2-stage_strategy.png} |
| 261 | |
| 262 | \footnotetext[1]{ |
| 263 | J. Cugliari, Y. Goude and J. M. Poggi, "Disaggregated electricity forecasting using wavelet-based clustering of individual consumers," 2016 IEEE International Energy Conference (ENERGYCON), Leuven, 2016, pp. 1-6. |
| 264 | } |
| 265 | |
| 266 | \end{frame} |
| 267 | |
| 268 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 269 | % FRAME: |
| 270 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 271 | |
| 272 | \begin{frame}{Data description} |
| 273 | |
| 274 | \begin{columns} |
| 275 | \column{0.45\textwidth} |
| 276 | \begin{block}{Available} |
| 277 | \begin{itemize} |
| 278 | \item |
| 279 | EDF : 25K professional clients, sampled @ 30min, 5 semesters |
| 280 | \item |
| 281 | external open data |
| 282 | \end{itemize} |
| 283 | \end{block} |
| 284 | |
| 285 | \begin{block}{Accesible} |
| 286 | \begin{itemize} |
| 287 | \item simulated (very large) data |
| 288 | \end{itemize} |
| 289 | \end{block} |
| 290 | \column{0.55\textwidth} |
| 291 | \includegraphics[width = \columnwidth]{pics/indiv.jpg} |
| 292 | %\textcolor{red}{A picture here?} |
| 293 | \end{columns} |
| 294 | |
| 295 | \end{frame} |
| 296 | |
| 297 | |
| 298 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 299 | % FRAME: |
| 300 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 301 | |
| 302 | \begin{frame}{Computing resources} |
| 303 | \begin{block}{2 academic testing architectures} |
| 304 | \begin{itemize} |
| 305 | \item Orsay's cluster (500Gb RAM, 80 cores) |
| 306 | \item \texttt{pulpito} : Lyon 2's box with 2 quadricores (HT x 2), 72Gb RAM |
| 307 | \end{itemize} |
| 308 | \end{block} |
| 309 | |
| 310 | \begin{block}{1 industrial real-scale architecture} |
| 311 | \begin{itemize} |
| 312 | \item mini cluster @ EDF labs |
| 313 | \end{itemize} |
| 314 | \end{block} |
| 315 | |
| 316 | \end{frame} |
| 317 | |
| 318 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 319 | % FRAME: |
| 320 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 321 | |
| 322 | \begin{frame}{Strategies for upscaling} |
| 323 | |
| 324 | \begin{itemize} |
| 325 | \item From 25K to 25M: in 1000 chunks of 25K |
| 326 | \item Reference values: |
| 327 | \begin{itemize} |
| 328 | \item $K'=200$ super consumers (SC) |
| 329 | \item $K\ast=15$ final clusters |
| 330 | \end{itemize} |
| 331 | \end{itemize} |
| 332 | |
| 333 | |
| 334 | |
| 335 | \begin{block}{1st strategy} |
| 336 | \begin{itemize} |
| 337 | \item Do 1000 times ONLY Energycon's 1st-step strategy on 25K clients |
| 338 | |
| 339 | \item With the $1000 \times K'$ SC perform a 2-step run |
| 340 | leading to $K^\ast$ clusters |
| 341 | \end{itemize} |
| 342 | \end{block} |
| 343 | |
| 344 | \begin{block}{2nd strategy} |
| 345 | \begin{itemize} |
| 346 | \item Do 1000 times Energycon's 2-step strategy on 25K clients |
| 347 | leading to $1000\times K^\ast$ intermediate clusters |
| 348 | \item Treat the intermediate clusters as individual curves and perform |
| 349 | a single 2-step run to get $K^\ast$ final clusters |
| 350 | \end{itemize} |
| 351 | \end{block} |
| 352 | |
| 353 | \end{frame} |
| 354 | |
| 355 | |
| 356 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 357 | % FRAME: |
| 358 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 359 | |
| 360 | \begin{frame} |
| 361 | \frametitle{Course + Workshop} |
| 362 | |
| 363 | \begin{block}{1-day IRSDI-ECAS Course} |
| 364 | \begin{itemize} |
| 365 | \item |
| 366 | GAM : from classical to distributed environments |
| 367 | \item |
| 368 | October 19, 2017 @ EDF Labs, Paris-Saclay, France |
| 369 | \item |
| 370 | Simon Wood \& Matteo Fasiolo (University Walk, Bristol, UK) |
| 371 | \end{itemize} |
| 372 | \end{block} |
| 373 | |
| 374 | |
| 375 | \begin{block}{1-day Worshop} |
| 376 | \begin{itemize} |
| 377 | \item |
| 378 | Individual Electricity Consumers, Data, Packages and Methods |
| 379 | \item |
| 380 | October 20, 2017 @ EDF Labs, Paris-Saclay, France |
| 381 | \item |
| 382 | 5 keynote speakers |
| 383 | \begin{itemize} |
| 384 | \item |
| 385 | Souhaib Ben Taieb, Monash University, Melbourne, Australia |
| 386 | \item |
| 387 | Ram Rajagopal, Stanford Univ., USA |
| 388 | \item |
| 389 | Gavin Shaddick, University of Bath, UK |
| 390 | \item |
| 391 | Bei Chen, IBM Research, Ireland |
| 392 | \item |
| 393 | Jack Kelly, University of London, Imperial College of Science, UK |
| 394 | \end{itemize} |
| 395 | \end{itemize} |
| 396 | \end{block} |
| 397 | \end{frame} |
| 398 | |
| 399 | |
| 400 | \section{Point sur les codes} |
| 401 | |
| 402 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 403 | % FRAME: |
| 404 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 405 | |
| 406 | \begin{frame}{Résumé point sur le code (BA, JC @ Lyon déc 2016)} |
| 407 | |
| 408 | Nous avons réussi à |
| 409 | \begin{itemize} |
| 410 | \item |
| 411 | faire une fresh installation du code de BA sur une nouvelle machine |
| 412 | (problèmes divers liés à la compilation, configuration, libraries exotiques) |
| 413 | \item |
| 414 | conduire des expériences sur les données pour mesurer le temps de calcul (le code est blazing fast: 30sec pour obtenir 500 groupes sur 4 procs) |
| 415 | \item |
| 416 | identifier de problèmes : manque un installateur et une interface de pretraitement indépendant du calcul |
| 417 | \end{itemize} |
| 418 | |
| 419 | A faire: |
| 420 | \begin{itemize} |
| 421 | \item |
| 422 | finir les experiences (sur nb de classes, nb de curves / chunk, nb de procs) et sur d'autres architectures |
| 423 | \item |
| 424 | interface matrice -> binaire |
| 425 | \item |
| 426 | obtenir les courbes synchrones |
| 427 | \end{itemize} |
| 428 | |
| 429 | Piste à explorer pour les comparaisons: \texttt{h2o} |
| 430 | \end{frame} |
| 431 | |
| 432 | \section{Expériences numériques} |
| 433 | |
| 434 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 435 | % FRAME: |
| 436 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 437 | |
| 438 | \begin{frame} |
| 439 | \frametitle{Code C/MPI} |
| 440 | |
| 441 | \begin{enumerate} |
| 442 | \item [0]] Sérialisation des données : on écrit d'abord la longueur de la série puis les puissances sont codées sur 3 octets, permettant une excellente compression et une lecture facile (accès en O(1) à n'importe quelle série) |
| 443 | |
| 444 | \item [1]] Algorithme PAM appliqué en parallèle via la librairie MPI. |
| 445 | |
| 446 | \item [2]] Agrégation des médoïdes obtenus, (re-)sérialisation, puis on ré-applique l'algorithme PAM. |
| 447 | |
| 448 | \end{enumerate} |
| 449 | |
| 450 | \begin{itemize} |
| 451 | \item |
| 452 | %Plusieurs astuces : sérialisation des données, calcul en parallèle |
| 453 | \item |
| 454 | Très rapide : environ 5 minutes from raw to 1st stage clustering |
| 455 | \item |
| 456 | Divergences par rapport à Energycon (moyennes au lieu d'aggrégation) |
| 457 | \end{itemize} |
| 458 | |
| 459 | %\begin{verbatim} |
| 460 | %> time ./ppam.exe serialize 2009.csv 2009.bin 1 0 |
| 461 | %real 7m34.182s |
| 462 | %\end{verbatim} |
| 463 | \end{frame} |
| 464 | |
| 465 | |
| 466 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 467 | % FRAME: |
| 468 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 469 | |
| 470 | \begin{frame} |
| 471 | \frametitle{R Code} |
| 472 | |
| 473 | \begin{itemize} |
| 474 | \item Enercon's code update |
| 475 | \item \texttt{data.table} is used for readings and writtings \footnote{\texttt{tidyverse} toolbox is much slower} |
| 476 | \item Disk spaces of the plain text associated object |
| 477 | \item Timmings on \texttt{pulpito} (16 cores, 64Gb RAM, SSD) |
| 478 | \end{itemize} |
| 479 | \begin{center} |
| 480 | \begin{tabular}{lccc}\toprule |
| 481 | Task & Time & Memory & Disk \\ \midrule |
| 482 | Raw (15Gb) to matrix & 7 min & |
| 483 | 30 Gb\footnote{\texttt{ff} is a promising alternative if needed} & |
| 484 | 2.7 Gb \\ |
| 485 | Compute contributions & 7 min & <1Gb & 7 Mb \\ |
| 486 | 1st stage clustering & 3 min & <1Gb & -- \\ |
| 487 | Aggregation & 1 min & 6Gb & 30 Mb \\ |
| 488 | Wer distance matrix & 40 min & 64Gb\footnote{Embarransgly parallel but still too slow} & 150 Kb \\ |
| 489 | Forecasts & 10 min & <1Gb & --\\ |
| 490 | \bottomrule |
| 491 | \end{tabular} |
| 492 | \end{center} |
| 493 | \end{frame} |
| 494 | |
| 495 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 496 | % FRAME: |
| 497 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 498 | |
| 499 | \begin{frame} |
| 500 | \frametitle{Why wer distance is so slow ?} |
| 501 | |
| 502 | |
| 503 | \begin{block}{2nd step strategy (Enercon way)} |
| 504 | % We proceed as follows: |
| 505 | \begin{itemize} |
| 506 | \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$. |
| 507 | \item Compute the wer-based dissimilarity matrix |
| 508 | \item Obtain the PAM-based clustering. |
| 509 | \end{itemize} |
| 510 | |
| 511 | \begin{block}{Current choices on the computation} |
| 512 | \begin{itemize} |
| 513 | \item From (\texttt{Rwave} \& \texttt{sowas}) to \texttt{biwavelt} |
| 514 | \item About 1 sec to compute \texttt{werd(x, y)} with current |
| 515 | filtering ($J \sim 52$ with 13 octaves, 4 voices ) |
| 516 | \item Need to compute $n (n - 1) / 2$ pairwise distances |
| 517 | (20K, 130K, 500K entries for $n = 200, 500, 1000$) |
| 518 | \item Need an efficient \texttt{werd} function (maybe in |
| 519 | RcppParallel ?) |
| 520 | \end{itemize} |
| 521 | |
| 522 | \end{block} |
| 523 | |
| 524 | |
| 525 | \end{block} |
| 526 | |
| 527 | \end{frame} |
| 528 | |
| 529 | |
| 530 | |
| 531 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 532 | % FRAME: |
| 533 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 534 | |
| 535 | \begin{frame}{CWT} |
| 536 | |
| 537 | \begin{block}{Continuous WT} |
| 538 | Starting with a mother wavelet $\psi$ consider $\psi_{a, \tau} = a^{-1/2} \psi\left(\frac{t-\tau}{a}\right)$. |
| 539 | |
| 540 | The CWT of a function $z\in L^2 (\mathbb{R})$ is, |
| 541 | $$ W_z(a, \tau) = \int_{-\infty}^{\infty} z(t) \psi_{a, \tau}^* (t) dt$$ |
| 542 | |
| 543 | As for Fourier transform, a spectral approach is possible. |
| 544 | |
| 545 | |
| 546 | \begin{eqnarray*} |
| 547 | S_z(a, \tau) &=& |W_z(a, \tau)|^2 \qquad\qquad \hbox{wavelet spectrum} \\ |
| 548 | \mathcal{W}_{z, x}(a, \tau) &=& W_z(a, \tau)W_x^*(a, \tau) \qquad \hbox{cross-wavelet transform} |
| 549 | \end{eqnarray*} |
| 550 | |
| 551 | %$$ S_z(a, \tau) = |W_z(a, \tau)|^2 \qquad \hbox{wavelet spectrum}$$ |
| 552 | |
| 553 | %$$ \mathcal{W}_{z, x}(a, \tau) = W_z(a, \tau)W_x(a, \tau)^* \qquad \hbox{cross-wavelet transform}$$ |
| 554 | \end{block} |
| 555 | |
| 556 | \end{frame} |
| 557 | |
| 558 | |
| 559 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 560 | % FRAME: |
| 561 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 562 | |
| 563 | \begin{frame}{Wavelet coherence} |
| 564 | \begin{block}{ } |
| 565 | \begin{equation*} \label{coherence} |
| 566 | R_{z,x}^2(a,\tau) = \frac{ |\tilde{\mathcal{W}}_{x,y}(a, \tau)|^2 }{|\tilde{\mathcal{W}}_{x,x}(a, \tau)| |\tilde{\mathcal{W}}_{y,y}(a, \tau) | }, |
| 567 | \end{equation*} |
| 568 | |
| 569 | Based on the extended $R^2$ coefficient, we can construct an coefficient of determination between two wavelet spectrums |
| 570 | |
| 571 | \begin{equation*}\label{eq:wer} |
| 572 | WER_{z, x}^2 = \frac{ |
| 573 | \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, x}(a, \tau)| d\tau \right)^2 da} { \int_0^\infty \left( \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{z, z}(a, \tau)| d\tau \int_{-\infty}^\infty |\tilde{\mathcal{W}}_{x, x}(a, \tau)| d\tau\right) da}. |
| 574 | \end{equation*} |
| 575 | |
| 576 | And obtain a dissimilarity based on it |
| 577 | |
| 578 | \begin{equation*}\label{eq:dist-wer} |
| 579 | d(z, x) = \sqrt{ JN(1 - \widehat{WER}_{z, x}^2)} |
| 580 | \end{equation*} |
| 581 | \end{block} |
| 582 | \end{frame} |
| 583 | |
| 584 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 585 | % FRAME: |
| 586 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 587 | |
| 588 | %\begin{frame} \frametitle{Wavelet coherence} |
| 589 | %\begin{block}{ } |
| 590 | % We proceed as follows: |
| 591 | % \begin{itemize} |
| 592 | % \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$. |
| 593 | % \item Compute a dissimilarity matrix with the coherency based dissimilarity. |
| 594 | % \item Using PAM obtain clusters $k=8$ clusters. |
| 595 | % \end{itemize} |
| 596 | % |
| 597 | % Rand Index (AC, WER) = 0.26 |
| 598 | % |
| 599 | %\end{block} |
| 600 | % |
| 601 | %\end{frame} |
| 602 | |
| 603 | |
| 604 | \end{document} |
| 605 | |
| 606 | |
| 607 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 608 | % FRAME: |
| 609 | %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= |
| 610 | |
| 611 | \begin{frame} |
| 612 | \frametitle{Misc jc} |
| 613 | |
| 614 | \begin{itemize} |
| 615 | \item simulated dataset : howto ? |
| 616 | \item temperature |
| 617 | \item Rcpp |
| 618 | \end{itemize} |
| 619 | |
| 620 | |
| 621 | |