1 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
5 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
6 \documentclass[10pt
]{beamer
}
8 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
10 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
11 \usepackage[utf8
]{inputenc}
12 \usepackage[T1]{fontenc}
13 \usepackage[francais
]{babel
}
22 \colorlet{darkred
}{red!
80!black
}
23 \colorlet{darkblue
}{blue!
80!black
}
24 \colorlet{darkgreen
}{green!
60!black
}
26 \usetikzlibrary{calc,decorations.pathmorphing,patterns
}
27 \pgfdeclaredecoration{penciline
}{initial
}{
28 \state{initial
}[width=+
\pgfdecoratedinputsegmentremainingdistance,
29 auto corner on length=
1mm,
]{
32 \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}
33 {\pgfdecorationsegmentamplitude}
37 \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt
}}
38 {\pgfqpoint{-
\pgfdecorationsegmentaspect
39 \pgfdecoratedinputsegmentremainingdistance}%
40 {\pgfmathresult\pgfdecorationsegmentamplitude}
44 \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt
}{1pt
}}
49 \tikzstyle{block
} =
[draw,rectangle,thick,minimum height=
2em,minimum width=
2em
]
52 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
54 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
55 %\setbeameroption{show notes}
56 \setbeamertemplate{navigation symbols
}{}
57 %\setbeamercovered{transparent}
59 %\begin{frame}{Outline}
60 % \tableofcontents[currentsection]
64 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
66 % PRESENTATION INFORMATION
68 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
75 \title{Disaggregated Electricity Forecasting using Clustering of
77 \subtitle{Réunion mi parcours
}
79 \institute{IRSDI - RESEARCH INITIATIVE IN INDUSTRIAL DATA SCIENCE
}
80 \date{19 janvier
2017}
85 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
89 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
91 \frame[plain
]{\maketitle}
95 \section{IRSDI follow up meeting
}
97 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
99 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
101 \begin{frame
}{The project in a nutshell
}
102 \begin{block
}{Context
}
105 Industrial : Electricity load forecasting \& smart grids infrastructure
107 Academic : curve's shape \& nonparametric function-valued forecast
109 Past work : clustering with wavelets (RC, Wer), KWF, Enercon
115 \item evaluate the upscaling capacity of the Energycon strategy
116 \item adapt KWF to an exogenous variable (e.g. meteorological)
121 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
123 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
125 \begin{frame
}{Clients hierarchical structure and prediction
}
128 \column{.6\textwidth}
129 \begin{figure
}[!ht
]\centering
130 \includegraphics[width =
\textwidth]{pics/schema.png
}
131 \caption{Hierarchical structure of $N$ individual clients among $K$
132 groups.
}\label{fig:schema-hier
}
135 \column{.4\textwidth}
136 \begin{tikzpicture
}[decoration=penciline, decorate
]
137 \node[block, decorate
] at (
0,
0)
{$Z_t$
} ;
138 \node[block, decorate
] at (
3,
0)
{$Z_
{t +
1}$
} ;
140 \node[block, decorate
] at (
0, -
2.5)
{$
\begin{pmatrix
}
141 Z_
{t,
1} \\ Z_
{t,
2} \\
\vdots \\ Z_
{t, K
}
144 \node[block, decorate
] at (
3, -
2.5)
{$
\begin{pmatrix
}
145 Z_
{t+
1,
1} \\ Z_
{t+
1,
2} \\
\vdots \\ Z_
{t+
1, k
}
148 \draw[decorate, darkblue, line width =
2mm, ->
] (
1,
0) -- (
2,
0);
149 \draw[decorate, darkgreen, line width =
2mm, ->
] (
1, -
2.5) -- (
2, -
2.5);
150 \draw[decorate, black, line width =
2mm, ->
] (
3, -
1.3) -- (
3, -
0.4);
151 \draw[decorate, darkred, line width =
2mm, ->
] (
1, -
1.5) -- (
2, -
0.75);
156 \item $Z_t$: aggregate demand at $t$
157 \hfill $Z_
{t, k
}$:demand of group $k$ at moment $t$
158 \item Groups can express tariffs, geographical dispersion, client class ...
159 \item Profiling vs Prediction
160 \item We follow Misiti
\textit{et al
}. (
2010) to construct classes of customers to better predict the aggregate.
165 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
167 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
169 \begin{frame
}{Expected result
}
171 \includegraphics[width =
\textwidth]{pics/perf.pdf
}
174 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
176 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
178 \begin{frame
}{Energy decomposition of the DWT
}
182 \item Energy conservation of the signal
184 \begin{equation*
}\label{eq:energy
}
185 \| z\|^
2_H
\approx \|
\widetilde{z_J
} \|_2^
2
186 = c_
{0,
0}^
2 +
\sum_{j=
0}^
{J-
1} \sum_{k=
0}^
{2^j-
1} d_
{j,k
} ^
2 =
187 c_
{0,
0}^
2 +
\sum_{j=
0}^
{J-
1} \|
\mathbf{d
}_
{j
} \|_2^
2.
189 % \item characterization by the set of channel variances estimated at the output of the corresponding filter bank
190 \item For each $j=
0,
1,
\ldots,J-
1$, we compute the
\textcolor{blue
}{absolute
} and
191 \textcolor{orange
}{relative
} contribution representations by
193 \
[ \underbrace{\hbox{cont
}_j = ||
\mathbf{d_j
}||^
2}_
{\fbox{\textcolor{blue
}{AC
}}}
194 \qquad \text{and
} \qquad
195 \underbrace{\hbox{rel
}_j =
196 \frac{||
\mathbf{d_j
}||^
2}
197 {\sum_j ||
\mathbf{d_j
}||^
2 }}_
{\fbox{\textcolor{orange
}{RC
}}} .\
]
198 %\item They quantify the relative importance of the scales to the global dynamic.
199 % \item Only the wavelet coefficients $\set{d_{j,k}}$ are used.
200 % \item RC normalizes the energy of each signal to 1.
205 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
207 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
210 % \frametitle{Schema of procedure}
212 \includegraphics[width =
7cm, height =
2cm
]{./pics/Diagramme1.png
}
213 % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260
218 \item [0. Data preprocessing.
] Approximate sample paths of $z_1(t),
\ldots,z_n(t)$
%by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$.
219 \item [1. Feature extraction.
] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC).
220 \item [2. Feature selection.
] Screen irrelevant variables.
\begin{tiny
} [Steinley \& Brusco ('
06)
]\end{tiny
}
221 %\item [3. Determine the number of clusters.] Detecting significant jumps %in the transformed distortion curve.
222 %\begin{tiny} [Sugar \& James ('03)]\end{tiny}
223 %\item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm.
224 \end{description
} \end{footnotesize
}
228 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
230 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
233 \frametitle{A function-based distance
}
236 \column{0.6\textwidth}
238 \item Distance based on wavelet-correlation between two time series
239 \item Can be used to measure relationship between two functions
240 %variables, i.e. temperature and load.
241 \item The strength of the relation is hierarchically decomposed across
242 scales without losing of time location
245 Drawback: needs more computation time and storage (complex values)
246 \column{0.4\textwidth}
247 \includegraphics[width =
\textwidth]{pics/conso-week.png
}
249 \includegraphics[width =
.96\textwidth]{pics/wsp-week.png
}
254 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
256 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
258 \begin{frame
}{A
2-stages strategy (Energycon)
}
260 \includegraphics[width =
\textwidth]{pics/
2-stage_strategy.png
}
263 J. Cugliari, Y. Goude and J. M. Poggi, "Disaggregated electricity forecasting using wavelet-based clustering of individual consumers,"
2016 IEEE International Energy Conference (ENERGYCON), Leuven,
2016, pp.
1-
6.
268 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
270 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
272 \begin{frame
}{Data description
}
275 \column{0.45\textwidth}
276 \begin{block
}{Available
}
279 EDF :
25K professional clients, sampled @
30min,
5 semesters
285 \begin{block
}{Accesible
}
287 \item simulated (very large) data
290 \column{0.55\textwidth}
291 \includegraphics[width =
\columnwidth]{pics/indiv.jpg
}
292 %\textcolor{red}{A picture here?}
298 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
300 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
302 \begin{frame
}{Computing resources
}
303 \begin{block
}{2 academic testing architectures
}
305 \item Orsay's cluster (
500Gb RAM,
80 cores)
306 \item \texttt{pulpito
} : Lyon
2's box with
2 quadricores (HT x
2),
72Gb RAM
310 \begin{block
}{1 industrial real-scale architecture
}
312 \item mini cluster @ EDF labs
318 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
320 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
322 \begin{frame
}{Strategies for upscaling
}
325 \item From
25K to
25M: in
1000 chunks of
25K
326 \item Reference values:
328 \item $K'=
200$ super consumers (SC)
329 \item $K
\ast=
15$ final clusters
335 \begin{block
}{1st strategy
}
337 \item Do
1000 times ONLY Energycon's
1st-step strategy on
25K clients
339 \item With the $
1000 \times K'$ SC perform a
2-step run
340 leading to $K^
\ast$ clusters
344 \begin{block
}{2nd strategy
}
346 \item Do
1000 times Energycon's
2-step strategy on
25K clients
347 leading to $
1000\times K^
\ast$ intermediate clusters
348 \item Treat the intermediate clusters as individual curves and perform
349 a single
2-step run to get $K^
\ast$ final clusters
356 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
358 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
361 \frametitle{Course + Workshop
}
363 \begin{block
}{1-day IRSDI-ECAS Course
}
366 GAM : from classical to distributed environments
368 October
19,
2017 @ EDF Labs, Paris-Saclay, France
370 Simon Wood \& Matteo Fasiolo (University Walk, Bristol, UK)
375 \begin{block
}{1-day Worshop
}
378 Individual Electricity Consumers, Data, Packages and Methods
380 October
20,
2017 @ EDF Labs, Paris-Saclay, France
385 Souhaib Ben Taieb, Monash University, Melbourne, Australia
387 Ram Rajagopal, Stanford Univ., USA
389 Gavin Shaddick, University of Bath, UK
391 Bei Chen, IBM Research, Ireland
393 Jack Kelly, University of London, Imperial College of Science, UK
400 \section{Point sur les codes
}
402 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
404 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
406 \begin{frame
}{Résumé point sur le code (BA, JC @ Lyon déc
2016)
}
411 faire une fresh installation du code de BA sur une nouvelle machine
412 (problèmes divers liés à la compilation, configuration, libraries exotiques)
414 conduire des expériences sur les données pour mesurer le temps de calcul (le code est blazing fast:
30sec pour obtenir
500 groupes sur
4 procs)
416 identifier de problèmes : manque un installateur et une interface de pretraitement indépendant du calcul
422 finir les experiences (sur nb de classes, nb de curves / chunk, nb de procs) et sur d'autres architectures
424 interface matrice -> binaire
426 obtenir les courbes synchrones
429 Piste à explorer pour les comparaisons:
\texttt{h2o
}
432 \section{Expériences numériques
}
434 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
436 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
439 \frametitle{Code C/MPI
}
442 \item [0]] Sérialisation des données : on écrit d'abord la longueur de la série puis les puissances sont codées sur
3 octets, permettant une excellente compression et une lecture facile (accès en O(
1) à n'importe quelle série)
444 \item [1]] Algorithme PAM appliqué en parallèle via la librairie MPI.
446 \item [2]] Agrégation des médoïdes obtenus, (re-)sérialisation, puis on ré-applique l'algorithme PAM.
452 %Plusieurs astuces : sérialisation des données, calcul en parallèle
454 Très rapide : environ
5 minutes from raw to
1st stage clustering
456 Divergences par rapport à Energycon (moyennes au lieu d'aggrégation)
460 %> time ./ppam.exe serialize 2009.csv 2009.bin 1 0
466 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
468 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
474 \item Enercon's code update
475 \item \texttt{data.table
} is used for readings and writtings
\footnote{\texttt{tidyverse
} toolbox is much slower
}
476 \item Disk spaces of the plain text associated object
477 \item Timmings on
\texttt{pulpito
} (
16 cores,
64Gb RAM, SSD)
480 \begin{tabular
}{lccc
}\toprule
481 Task & Time & Memory & Disk \\
\midrule
482 Raw (
15Gb) to matrix &
7 min &
483 30 Gb
\footnote{\texttt{ff
} is a promising alternative if needed
} &
485 Compute contributions &
7 min & <
1Gb &
7 Mb \\
486 1st stage clustering &
3 min & <
1Gb & -- \\
487 Aggregation &
1 min &
6Gb &
30 Mb \\
488 Wer distance matrix &
40 min &
64Gb
\footnote{Embarransgly parallel but still too slow
} &
150 Kb \\
489 Forecasts &
10 min & <
1Gb & --\\
495 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
497 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
500 \frametitle{Why wer distance is so slow ?
}
503 \begin{block
}{2nd step strategy (Enercon way)
}
504 % We proceed as follows:
506 \item Transform data $z_1(t),
\ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J
\times N$.
507 \item Compute the wer-based dissimilarity matrix
508 \item Obtain the PAM-based clustering.
511 \begin{block
}{Current choices on the computation
}
513 \item From (
\texttt{Rwave
} \&
\texttt{sowas
}) to
\texttt{biwavelt
}
514 \item About
1 sec to compute
\texttt{werd(x, y)
} with current
515 filtering ($J
\sim 52$ with
13 octaves,
4 voices )
516 \item Need to compute $n (n -
1) /
2$ pairwise distances
517 (
20K,
130K,
500K entries for $n =
200,
500,
1000$)
518 \item Need an efficient
\texttt{werd
} function (maybe in
531 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
533 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
537 \begin{block
}{Continuous WT
}
538 Starting with a mother wavelet $
\psi$ consider $
\psi_{a,
\tau} = a^
{-
1/
2} \psi\left(
\frac{t-
\tau}{a
}\right)$.
540 The CWT of a function $z
\in L^
2 (
\mathbb{R
})$ is,
541 $$ W_z(a,
\tau) =
\int_{-
\infty}^
{\infty} z(t)
\psi_{a,
\tau}^* (t) dt$$
543 As for Fourier transform, a spectral approach is possible.
547 S_z(a,
\tau) &=& |W_z(a,
\tau)|^
2 \qquad\qquad \hbox{wavelet spectrum
} \\
548 \mathcal{W
}_
{z, x
}(a,
\tau) &=& W_z(a,
\tau)W_x^*(a,
\tau)
\qquad \hbox{cross-wavelet transform
}
551 %$$ S_z(a, \tau) = |W_z(a, \tau)|^2 \qquad \hbox{wavelet spectrum}$$
553 %$$ \mathcal{W}_{z, x}(a, \tau) = W_z(a, \tau)W_x(a, \tau)^* \qquad \hbox{cross-wavelet transform}$$
559 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
561 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
563 \begin{frame
}{Wavelet coherence
}
565 \begin{equation*
} \label{coherence
}
566 R_
{z,x
}^
2(a,
\tau) =
\frac{ |
\tilde{\mathcal{W
}}_
{x,y
}(a,
\tau)|^
2 }{|
\tilde{\mathcal{W
}}_
{x,x
}(a,
\tau)| |
\tilde{\mathcal{W
}}_
{y,y
}(a,
\tau) |
},
569 Based on the extended $R^
2$ coefficient, we can construct an coefficient of determination between two wavelet spectrums
571 \begin{equation*
}\label{eq:wer
}
572 WER_
{z, x
}^
2 =
\frac{
573 \int_0^
\infty \left(
\int_{-
\infty}^
\infty |
\tilde{\mathcal{W
}}_
{z, x
}(a,
\tau)| d
\tau \right)^
2 da
} { \int_0^
\infty \left(
\int_{-
\infty}^
\infty |
\tilde{\mathcal{W
}}_
{z, z
}(a,
\tau)| d
\tau \int_{-
\infty}^
\infty |
\tilde{\mathcal{W
}}_
{x, x
}(a,
\tau)| d
\tau\right) da
}.
576 And obtain a dissimilarity based on it
578 \begin{equation*
}\label{eq:dist-wer
}
579 d(z, x) =
\sqrt{ JN(
1 -
\widehat{WER
}_
{z, x
}^
2)
}
584 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
586 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
588 %\begin{frame} \frametitle{Wavelet coherence}
590 % We proceed as follows:
592 % \item Transform data $z_1(t), \ldots, z_n(t)$ using the CWT and Morlet wavelet to obtain $n$ matrices of size $J\times N$.
593 % \item Compute a dissimilarity matrix with the coherency based dissimilarity.
594 % \item Using PAM obtain clusters $k=8$ clusters.
597 % Rand Index (AC, WER) = 0.26
607 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
609 %-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
615 \item simulated dataset : howto ?