1 \documentclass[xcolor=dvipsnames, smaller
]{beamer
}
3 \usepackage[utf8
]{inputenc}
4 \usepackage{amsmath, amsfonts
}
5 \usepackage[francais
]{babel
}
6 \usepackage{hyperref, url, booktabs, subcaption, tikz
}
8 \hypersetup{colorlinks,linkcolor=black,urlcolor=violet
}
11 \setbeamertemplate{sections/subsections in toc
}[square
]
12 \beamertemplatenavigationsymbolsempty
15 \newcommand{\N}{\mathbb{N
}} % naturals
16 \newcommand{\set}[1]{\lbrace#1\rbrace} % set
17 \newcommand{\R}{\mathbb{R
}} % real
19 \colorlet{darkred
}{red!
80!black
}
20 \colorlet{darkblue
}{blue!
80!black
}
21 \colorlet{darkgreen
}{green!
60!black
}
23 \usetikzlibrary{calc,decorations.pathmorphing,patterns
}
24 \pgfdeclaredecoration{penciline
}{initial
}{
25 \state{initial
}[width=+
\pgfdecoratedinputsegmentremainingdistance,
26 auto corner on length=
1mm,
]{
29 \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}
30 {\pgfdecorationsegmentamplitude}
34 \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt
}}
35 {\pgfqpoint{-
\pgfdecorationsegmentaspect
36 \pgfdecoratedinputsegmentremainingdistance}%
37 {\pgfmathresult\pgfdecorationsegmentamplitude}
41 \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt
}{1pt
}}
47 \tikzstyle{block
} =
[draw,rectangle,thick,minimum height=
2em,minimum width=
2em
]
51 % = = = = = = = = = = = = = = = = = = = = = = = = Separator = = = =
54 \begin{frame
}{Sommaire
}
55 \tableofcontents[currentsection
]
59 %--------------------------------------------------------------------------
62 \title{Non supervised classification of individual electricity curves
}
63 \author{Jairo Cugliari
}
64 \institute{%Laboratoire ERIC, Université Lyon 2
66 % \includegraphics[height = 1.5cm]{pics/logo_dis.png}
68 \includegraphics[height =
1cm
]{pics/logo_eric.png
}
70 % \includegraphics[height = 1cm]{pics/logo_lyon2.jpg}
77 %--------------------------------------------------------------------------
79 % \begin{frame}[plain]
81 \begin{frame
}[plain, noframenumbering, b
]
84 % % \includegraphics[height = 1.5cm]{pics/logo_dis.png}
86 % \includegraphics[height = 1.5cm]{pics/logo_eric.png}
88 % \includegraphics[height = 1.5cm]{pics/logo_lyon2.jpg}
93 \begin{center
}{\scriptsize
94 Joint work with Benjamin Auder (LMO, Université Paris-Sud)
}
98 % \includegraphics[width = 0.15\textwidth]{pics/by-nc-sa.png}
105 % \begin{center}{\scriptsize
106 % Joint work with Benjamin Auder (LMO, Université Paris-Sud) }
110 %--------------------------------------------------------------------------
112 \frame{\frametitle{Outline
}
116 %--------------------------------------------------------------------------
121 \begin{frame
}{Industrial motivation
}
124 \column{0.6\textwidth}
126 \item Smartgrid \& Smart meters : time real information
127 \item Lot of data of different nature
128 \item Many problems : transfer protocol, security, privacy, ...
129 \item The French touch:
35M Linky smartmeter
134 What can we do with all these data ?
136 \column{0.4\textwidth}
137 \includegraphics[width =
\textwidth]{./pics/smartgrid.jpg
}
139 \includegraphics[width =
\textwidth]{./pics/linky.jpg
}
143 %--------------------------------------------------------------------------
145 \begin{frame
}{Electricity demand data
}
146 \framesubtitle{Some salient features
}
148 \begin{figure
}[!ht
] \centering
149 \begin{subfigure
}[t
]{0.45\textwidth}
150 \includegraphics[width=
\textwidth]{pics/longtermload.png
}
151 \caption{Long term trand
} %\label{fig:gull}
153 ~
%spacing between images
154 \begin{subfigure
}[t
]{0.45\textwidth}
155 \includegraphics[width=
\textwidth]{pics/twoyearsload.png
}
156 \caption{Weekly cycle
} % \label{fig:tiger}
159 \begin{subfigure
}[t
]{0.45\textwidth}
160 \includegraphics[width=
\textwidth]{pics/dailyloads.png
}
161 \caption{Daily load curve
} % \label{fig:mouse}
163 ~
%spacing between images
164 \begin{subfigure
}[t
]{0.45\textwidth}
165 \includegraphics[width=
\textwidth]{pics/consotemp.png
}
166 \caption{Electricity load vs. temperature
}
171 %--------------------------------------------------------------------------
173 \begin{frame
}[shrink
]{FD as slices of a continuous process
174 \begin{scriptsize
} \hfill [Bosq, (
1990)
] \end{scriptsize
}}
176 The prediction problem
179 \item Suppose one observes a square integrable continuous-time stochastic process $X=(X(t), t
\in\R)$ over the interval $
[0,T
]$, $T>
0$;
180 \item {We want to predict $X$ all over the segment $
[T, T+
\delta],
\delta>
0$
}
181 \item {Divide the interval into $n$ subintervals of equal
183 \item Consider the functional-valued discrete time stochastic process $ Z = (Z_k, k
\in\N) $, where $
\mathbb{N
} =
\set{ 1,
2,
\ldots } $, defined by
190 \
[ Z_k(t) = X(t + (k-
1)
\delta) \
]
191 \
[ k
\in\N \;\;\;
\forall t
\in [0,
\delta) \
]
195 If $X$ contents a $
\delta-$seasonal component,
196 $Z$ is particularly fruitful.
200 %--------------------------------------------------------------------------
202 \begin{frame
}{Long term objective
}
205 \column{.6\textwidth}
206 %\begin{figure}[!ht]\centering
207 \includegraphics[width =
\textwidth]{pics/schema.png
}
208 %\caption{Hierarchical structure of $N$ individual clients among $K$ groups.}\label{fig:schema-hier}
211 \column{.4\textwidth}
212 \begin{tikzpicture
}[decoration=penciline, decorate
]
213 \node[block, decorate
] at (
0,
0)
{$Z_t$
} ;
214 \node[block, decorate
] at (
3,
0)
{$Z_
{t +
1}$
} ;
216 \node[block, decorate
] at (
0, -
2.5)
{$
\begin{pmatrix
}
217 Z_
{t,
1} \\ Z_
{t,
2} \\
\vdots \\ Z_
{t, K
}
220 \node[block, decorate
] at (
3, -
2.5)
{$
\begin{pmatrix
}
221 Z_
{t+
1,
1} \\ Z_
{t+
1,
2} \\
\vdots \\ Z_
{t+
1, k
}
224 \draw[decorate, darkblue, line width =
2mm, ->
] (
1,
0) -- (
2,
0);
225 \draw[decorate, darkgreen, line width =
2mm, ->
] (
1, -
2.5) -- (
2, -
2.5);
226 \draw[decorate, black, line width =
2mm, ->
] (
3, -
1.3) -- (
3, -
0.4);
227 \draw[decorate, darkred, line width =
2mm, ->
] (
1, -
1.5) -- (
2, -
0.75);
232 \item Groups can express tariffs, geographical dispersion, client class ...
233 \item \textbf{IDEA
}: Use a clustering algorithm to learn groups of customer structure
234 \item \textbf{Aim
}: Set up a classical clustering algorithm to run in parallel
238 %--------------------------------------------------------------------------
240 \section{Functional clustering
}
245 \column{0.6\textwidth}
248 \item Segmentation of $X$ may not suffices to render reasonable
249 the stationary hypothesis.
250 \item If a grouping effect exists, we may considered stationary within each group.
251 \item Conditionally on the grouping, functional time series prediction methods
253 \item We propose a clustering procedure that discover the groups from a bunch
257 We use wavelet transforms to take into account the fact
258 that curves may present non stationary patters.
261 \column{0.4\textwidth}
262 \includegraphics[width=
0.9\textwidth,
263 height=
2.7cm
]{pics/conso-traj.png
}
265 Two strategies to cluster functional time series:
267 \item Feature extraction (summary measures of the curves).
268 \item Direct similarity between curves.
274 %---------------------------
276 \begin{frame
}[plain
]{Wavelets to cope with
\textsc{fd
}}
279 \column{.6\textwidth}
282 \includegraphics[width =
\textwidth]{./pics/weekly-
5.png
}
283 % * * * * * * * * * * * * * * * * * * *
284 \column{.4\textwidth}
285 \begin{block
}{ } %Wavelet transform}
288 \item domain-transform technique for hierarchical decomposing finite energy signals
289 \item description in terms of a broad trend (
\textcolor{PineGreen
}{approximation part
}), plus a set of localized changes kept in the
\textcolor{red
}{details parts
}.
295 \begin{block
}{Discrete Wavelet Transform
}
297 If $z
\in L_2(
[0,
1])$ we can write it as
299 \begin{equation*
}\label{eq:zeta
}
300 z(t) =
\sum_{k=
0}^
{2^
{j_0
}-
1} \textcolor{PineGreen
}{c_
{j_0, k
}} \phi_{j_0,k
} (t) +
301 \sum_{j=
{j_0
}}^
{\infty}
302 \sum_{k=
0}^
{2^j-
1} \textcolor{red
}{d_
{j,k
}} \psi_{j,k
} (t) ,
306 where $ c_
{j,k
} = <g,
\phi_{j,k
} > $, $ d_
{j,k
} = <g,
\varphi_{j,k
}>$ are the
307 \textcolor{PineGreen
}{scale coefficients
} and
\textcolor{red
}{wavelet coefficients
} respectively, and the functions $
\phi$ et $
\varphi$ are associated to a orthogonal
\textsc{mra
} of $L_2(
[0,
1])$.
311 %---------------------------------------- SLIDE ---------------------
313 \begin{frame
}{Energy decomposition of the DWT
}
317 \item Energy conservation of the signal
319 \begin{equation*
}\label{eq:energy
}
320 \| z \|_H^
2 \approx \|
\widetilde{z_J
} \|_2^
2
321 = c_
{0,
0}^
2 +
\sum_{j=
0}^
{J-
1} \sum_{k=
0}^
{2^j-
1} d_
{j,k
} ^
2 =
322 c_
{0,
0}^
2 +
\sum_{j=
0}^
{J-
1} \|
\mathbf{d
}_
{j
} \|_2^
2.
324 % \item characterization by the set of channel variances estimated at the output of the corresponding filter bank
325 \item For each $j=
0,
1,
\ldots,J-
1$, we compute the absolute and
326 relative contribution representations by
328 \
[ \underbrace{\hbox{cont
}_j = ||
\mathbf{d_j
}||^
2}_
{\fbox{AC
}}
329 \qquad \text{and
} \qquad
330 \underbrace{\hbox{rel
}_j =
331 \frac{||
\mathbf{d_j
}||^
2}
332 {\sum_j ||
\mathbf{d_j
}||^
2 }}_
{\fbox{RC
}} .\
]
333 \item They quantify the relative importance of the scales to the global dynamic.
334 % \item Only the wavelet coefficients $\set{d_{j,k}}$ are used.
335 \item RC normalizes the energy of each signal to
1.
339 % =======================================
342 \frametitle{Schema of procedure
}
344 \includegraphics[width =
7cm, height =
2cm
]{./pics/Diagramme1.png
}
345 % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260
350 \item [0. Data preprocessing.
] Approximate sample paths of $z_1(t),
\ldots,z_n(t)$
%by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$.
351 \item [1. Feature extraction.
] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC).
352 \item [2. Feature selection.
] Screen irrelevant variables.
\begin{tiny
} [Steinley \& Brusco ('
06)
]\end{tiny
}
353 \item [3. Determine the number of clusters.
] Detecting significant jumps in the transformed distortion curve.
354 \begin{tiny
} [Sugar \& James ('
03)
]\end{tiny
}
355 \item [4. Clustering.
] Obtain the $K$ clusters using PAM algorithm.
356 \end{description
} \end{footnotesize
}
358 \footnotetext[1]{Antoniadis, X. Brossat, J. Cugliari et J.-M. Poggi (
2013), Clustering Functional Data Using Wavelets,
{\it IJWMIP
},
11(
1),
35--
64}
362 % ===========================================
364 \section{Parallel $k$-medoids
}
366 \begin{frame
}{Partitioning Around Medoids (PAM)
367 \begin{scriptsize
} \hfill [Kaufman et Rousseeuw~(
1987)
] \end{scriptsize
}}
370 \item Partition the $n$ points $R^d$-scatter into $K$ clusters
371 \item Optimization problem :
372 \
[ D(x) =
\min_{m_1,
\dots,m_k
\in \mathbb{R
}^d
} \sum_{i=
1}^
{n
} \min_{j=
1,
\dots,k
} \| x_i - m_j \| \, ,\
]
373 with $x = (x_1,
\dots,x_n)$, $\|\,.\,\|$ can be any norm. Here we choose to use the euclidean norm.
374 \item Robust version of $k$-means
375 \item Computational burden : medians instead of means
376 \item Several heuristics allow to reduce the computation time.
380 % ===========================================
382 \begin{frame
}{Parallelization with MPI
}
385 \column{.8\textwidth}
387 \item Easy to use library routines allowing to write algorithms in parallel
388 \item Available on several languages
389 \item We use the master-slave mode
392 \column{.2\textwidth}
393 \includegraphics[width=
\textwidth]{./pics/open-mpi-logo.png
}
398 \begin{block
}{The outline of code:
}
400 \item The master process splits the problem in tasks over the data set and sends it to the workers;
401 \item Each worker reduces the functional nature of the data using the DWT, applies the clustering and returns the centers;
402 \item The master recuperates and clusters the centers into $K$ meta centers.
406 The source code is open and will be available to download from
407 \href{https://github.com/
}{github
}.
409 \footnotetext[1]{B. Auder \& J. Cugliari. Parallélisation de l'algorithme des $k$-médoïdes. Application au clustering de courbes. (
2014, submitted)
}
412 \section{Numerical experiences
}
414 % ===========================================
416 \begin{frame
}{Application I: Starlight curves
}
419 \item Data from UCR Time Series Classification/Clustering
420 \item 1000 curves learning set +
8236 validation set ($d=
1024$)
% discretization points
424 \begin{minipage
}[c
]{.32\linewidth}
425 \includegraphics[width=
\linewidth,height=
3.5cm
]{pics/slgr1.png
}
429 \begin{minipage
}[c
]{.32\linewidth}
430 \includegraphics[width=
\linewidth,height=
3.5cm
]{pics/slgr2.png
}
434 \begin{minipage
}[c
]{.32\linewidth}
435 \includegraphics[width=
\linewidth,height=
3.5cm
]{pics/slgr3.png
}
439 \label{figsltr3clusts
}
444 \begin{tabular
}{lccc
} \toprule
445 & &
\multicolumn{2}{c
}{Adequacy
} \\
446 & Distortion & Internal & External \\
\midrule
447 Training (sequential) &
1.31e4 &
0.79 &
0.77 \\
448 Training (parallel) &
1.40e4 &
0.79 &
0.68 \\
449 Test (sequential) &
1.09e5 &
0.78 &
0.76 \\
450 Test (parallel) &
1.15e5 &
0.78 &
0.69 \\
\bottomrule
452 %\caption{Distorsions et indices d'adéquation des partitions}
457 % ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
459 \begin{frame
}{Application II: EDF data
}
462 \includegraphics[width=
0.9\textwidth]{pics/conso-shapes.png
}
463 % conso-traj.eps: 0x0 pixel, 300dpi, 0.00x0.00 cm, bb=18 18 577 824
464 \caption{ \begin{footnotesize
}
465 French electricity power demand on autumn (top left), winter (bottom left), spring (top right) and summer (bottom right).
\end{footnotesize
} }
466 \label{fig:conso-shapes
}
472 \item The significant scales for revealing the cluster structure are independent of the possible number of clusters.
473 \item Significant scales are associated to mid-frequencies.
474 \item The retained scales parametrize the represented cycles of
1.5,
3 and
6 hours (AC).
475 \end{itemize
} \end{footnotesize
}
479 % ===========================================
484 \includegraphics[width=
0.9\textwidth]{./pics/conso_jump_AC.png
} \\
485 \caption{ \begin{footnotesize
}
486 Number of clusters by feature extraction of the AC (top). From left to right: distortion curve, transformed distortion curve and first difference on the transformed distortion curve.
\end{footnotesize
} }
487 \label{fig:conso-jumps
}
491 % ===========================================
494 \begin{figure
} \centering
495 \begin{subfigure
}[t
]{0.45\textwidth}
496 \includegraphics[width=
\textwidth]{./pics/conso_AC-curves.png
}
500 \begin{subfigure
}[t
]{0.45\textwidth}
501 \includegraphics[width=
\textwidth]{./pics/conso_AC-calendar.png
}
504 % \subfloat[Calendar]{\label{fig:conso_clust_AC_cal}
505 % \includegraphics[width = 0.45\textwidth]{./pics/conso_AC-calendar.png}}
506 \caption{Curves membership of the clustering using AC based dissimilarity (a) and the corresponding calendar positioning (b).
}
511 % ===========================================
514 \begin{frame
}{Application III: Electricity Smart Meter CBT (ISSDA)
} \small
516 \footnotetext[1]{\textit{Irish Social Science Data Archive
},
\url{http://www.ucd.ie/issda/data/
}}
519 \item 4621 Irish households smart meter data
% eséries de consommation électrique de foyers irlandais
520 \item About
25K discretization points
521 \item We test with $K=$
3 or
5 classes
522 \item We compare sequential and parallel versions
528 \begin{tabular
}{lcc
} \toprule
530 & Distortion & Internal adequacy \\
\midrule
531 3 clusters sequential &
1.90e7 &
0.90 \\
532 3 clusters parallel &
2.15e7 &
0.90 \\
533 5 clusters sequential &
1.61e7 &
0.89 \\
534 5 clusters parallel &
1.84e7 &
0.89 \\
\bottomrule
536 % \caption{Distorsions et indices d'adéquation des partitions}
542 %--------------------------------------------------------------------------
546 \begin{frame
}{Conclusion
}
549 \item Identification of customers groups from smartmeter data
550 \item Wavelets allow to capture the functional nature of the data
551 \item Clustering algorithm upscale envisaged for millions of curves
552 \item \textit{Divide-and-Conquer
} approach thanks to MPI library
%pour l'algorithme des $k$-médoïdes : d'abord sur des groupes de données courbes, puis des groupes de médoïdes jusqu'à obtenir un seul ensemble traité sur un processseur.
553 %\item %Les résultats obtenus sur les deux jeux de données présentés sont assez encourageants, et permettent d'envisager une utilisation à plus grande échelle.
556 \begin{block
}{Further work
}
558 \item Go back to the prediction task
559 \item Apply the algorithm over many hundreds of processors
560 \item Connect the clustering method with a prediction model
565 %--------------------------------------------------------------------------
567 \begin{frame
}[plain
]{Bibliographie
}\small
569 \begin{thebibliography
}{10}
570 \bibitem{1} A. Antoniadis, X. Brossat, J. Cugliari et J.-M. Poggi (
2013), Clustering Functional Data Using Wavelets,
{\it IJWMIP
},
11(
1),
35--
64
572 \bibitem{2} R. Bekkerman, M. Bilenko et J. Langford - éditeurs (
2011), Scaling up Machine Learning: Parallel and Distributed Approaches,
{\it Cambridge University Press
}
574 \bibitem{3} P. Berkhin (
2006), A Survey of Clustering Data Mining Techniques,
{\it Grouping Multidimensional Data, éditeurs : J. Kogan, C. Nicholas, M. Teboulle
}.
576 \bibitem{6} J. Dean et S. Ghemawat (
2004), MapReduce: Simplified Data Processing on Large Clusters,
{\it Sixth Symposium on Operating System Design and Implementation
}.
578 \bibitem{7} G. De Francisci Morales et A. Bifet (
2013), G. De Francisci Morales SAMOA: A Platform for Mining Big Data Streams Keynote Talk at RAMSS ’
13:
2nd International Workshop on Real-Time Analysis and Mining of Social Streams WWW, Rio De Janeiro
580 \bibitem{10} L. Kaufman et P.J. Rousseeuw (
1987), Clustering by means of Medoids,
{\it Statistical Data Analysis Based on the L
\_1-Norm and Related Methods, éditeur : Y. Dodge
}.
581 \end{thebibliography
}
588 % \begin{frame}{Motivation académique: Big Data}
590 % \item Besoins spécifiques: très grands volumes de données, grande dimension
591 % \item Réponses: algorithmes opérant sur de grands graphes (Kang et al.~2009), sur des flux de données haut débit (De Francisci Morales et Bifet~2013)
592 % \item Bekkerman et al.~(2011): algorithmes de Machine Learning s'exécutant en parallèle
596 % \item classification non supervisée (\textit{clustering}): regrouper les données en \textit{clusters} homogènes, suffisamment distincts deux à deux
597 % \item nombreux algorithmes depuis Tyron~(1939) (voir Berkhin~2006 pour une revue)
598 % \item cependant la notion de cluster varie en fonction des données, du contexte et de l'algorithme utilisé
599 % \item technique très populaire qui permet
600 % de réduire la taille des données en les résumant à quelques représentants