communication/slides/201402-JClust.tex

   1 \documentclass[xcolor=dvipsnames, smaller]{beamer}
   2
   3 \usepackage[utf8]{inputenc}
   4 \usepackage{amsmath, amsfonts}
   5 \usepackage[francais]{babel}
   6 \usepackage{hyperref, url, booktabs, subcaption, tikz}
   7 %\usepackage{graphicx}
   8 \hypersetup{colorlinks,linkcolor=black,urlcolor=violet}
   9
  10 \mode<presentation>{
  11   \setbeamertemplate{sections/subsections in toc}[square]
  12   \beamertemplatenavigationsymbolsempty
  13 }
  14
  15 \newcommand{\N}{\mathbb{N}}                          % naturals
  16 \newcommand{\set}[1]{\lbrace#1\rbrace}               % set
  17 \newcommand{\R}{\mathbb{R}}         % real
  18
  19 \colorlet{darkred}{red!80!black}
  20 \colorlet{darkblue}{blue!80!black}
  21 \colorlet{darkgreen}{green!60!black}
  22
  23 \usetikzlibrary{calc,decorations.pathmorphing,patterns}
  24 \pgfdeclaredecoration{penciline}{initial}{
  25     \state{initial}[width=+\pgfdecoratedinputsegmentremainingdistance,
  26     auto corner on length=1mm,]{
  27         \pgfpathcurveto%
  28         {% From
  29             \pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}
  30                       {\pgfdecorationsegmentamplitude}
  31         }
  32         {%  Control 1
  33         \pgfmathrand
  34         \pgfpointadd{\pgfqpoint{\pgfdecoratedinputsegmentremainingdistance}{0pt}}
  35                     {\pgfqpoint{-\pgfdecorationsegmentaspect
  36                      \pgfdecoratedinputsegmentremainingdistance}%
  37                                {\pgfmathresult\pgfdecorationsegmentamplitude}
  38                     }
  39         }
  40         {%TO
  41         \pgfpointadd{\pgfpointdecoratedinputsegmentlast}{\pgfpoint{1pt}{1pt}}
  42         }
  43     }
  44     \state{final}{}
  45 }
  46 %
  47 \tikzstyle{block} = [draw,rectangle,thick,minimum height=2em,minimum width=2em]
  48
  49
  50
  51 % = = = = = = = = = = = = = = = = = = = = = = = = Separator = = = =
  52
  53 \AtBeginSection[]{
  54    \begin{frame}{Sommaire}
  55      \tableofcontents[currentsection]
  56    \end{frame}
  57 }
  58
  59 %--------------------------------------------------------------------------
  60
  61
  62 \title{Non supervised classification of individual electricity curves}
  63 \author{Jairo Cugliari}
  64 \institute{%Laboratoire ERIC, Université Lyon 2
  65 %  \begin{center}
  66   %  \includegraphics[height = 1.5cm]{pics/logo_dis.png}
  67   %  ~~~~%  separator
  68   \includegraphics[height = 1cm]{pics/logo_eric.png}
  69 %  ~~~~%  separator
  70 %  \includegraphics[height = 1cm]{pics/logo_lyon2.jpg}
  71 %\end{center}
  72 }
  73
  74
  75 \begin{document}
  76
  77 %--------------------------------------------------------------------------
  78
  79 % \begin{frame}[plain]
  80
  81 \begin{frame}[plain, noframenumbering, b]
  82
  83 % \begin{center}
  84 % %  \includegraphics[height = 1.5cm]{pics/logo_dis.png}
  85 % %  ~~~~%  separator
  86 %   \includegraphics[height = 1.5cm]{pics/logo_eric.png}
  87 %   ~~~~%  separator
  88 %   \includegraphics[height = 1.5cm]{pics/logo_lyon2.jpg}
  89 % \end{center}
  90
  91 \maketitle
  92
  93   \begin{center}{\scriptsize
  94     Joint work with Benjamin Auder (LMO, Université Paris-Sud) }
  95   \end{center}
  96
  97   % \begin{flushright}
  98 %    \includegraphics[width = 0.15\textwidth]{pics/by-nc-sa.png}
  99 % \end{flushright}
 100
 101 \end{frame}
 102
 103
 104 % \maketitle
 105 %   \begin{center}{\scriptsize
 106 %     Joint work with Benjamin Auder (LMO, Université Paris-Sud) }
 107 %   \end{center}
 108 % \end{frame}
 109
 110 %--------------------------------------------------------------------------
 111
 112 \frame{\frametitle{Outline}
 113        \tableofcontents
 114 }
 115
 116 %--------------------------------------------------------------------------
 117
 118 \section{Motivation}
 119
 120
 121 \begin{frame}{Industrial motivation}
 122
 123 \begin{columns}
 124 \column{0.6\textwidth}
 125 \begin{itemize}
 126  \item Smartgrid \& Smart meters : time real information
 127   \item Lot of data of different nature
 128  \item Many problems : transfer protocol, security, privacy, ...
 129  \item The French touch: 35M Linky smartmeter
 130 \end{itemize}
 131
 132 \vskip 1cm
 133
 134 What can we do with all these data ?
 135
 136 \column{0.4\textwidth}
 137 \includegraphics[width = \textwidth]{./pics/smartgrid.jpg}
 138
 139 \includegraphics[width = \textwidth]{./pics/linky.jpg}
 140 \end{columns}
 141 \end{frame}
 142
 143 %--------------------------------------------------------------------------
 144
 145 \begin{frame}{Electricity demand data}
 146 \framesubtitle{Some salient features}
 147
 148 \begin{figure}[!ht] \centering
 149   \begin{subfigure}[t]{0.45\textwidth}
 150      \includegraphics[width=\textwidth]{pics/longtermload.png}
 151      \caption{Long term trand} %\label{fig:gull}
 152   \end{subfigure}%
 153   ~ %spacing between images
 154   \begin{subfigure}[t]{0.45\textwidth}
 155      \includegraphics[width=\textwidth]{pics/twoyearsload.png}
 156      \caption{Weekly cycle} %     \label{fig:tiger}
 157   \end{subfigure}
 158
 159   \begin{subfigure}[t]{0.45\textwidth}
 160      \includegraphics[width=\textwidth]{pics/dailyloads.png}
 161      \caption{Daily load curve} %   \label{fig:mouse}
 162   \end{subfigure}
 163   ~ %spacing between images
 164   \begin{subfigure}[t]{0.45\textwidth}
 165      \includegraphics[width=\textwidth]{pics/consotemp.png}
 166      \caption{Electricity load vs. temperature}
 167   \end{subfigure}
 168 \end{figure}
 169 \end{frame}
 170
 171 %--------------------------------------------------------------------------
 172
 173 \begin{frame}[shrink]{FD as slices of a continuous process
 174       \begin{scriptsize} \hfill [Bosq, (1990)] \end{scriptsize}}
 175 %
 176   The prediction problem
 177
 178 \begin{itemize}
 179   \item Suppose one observes a square integrable continuous-time  stochastic process $X=(X(t), t\in\R)$ over the interval $[0,T]$, $T>0$;
 180   \item {We want to predict $X$ all over the segment $[T, T+\delta], \delta>0$}
 181   \item {Divide the interval into $n$ subintervals of equal
 182              size $\delta$.}
 183   \item Consider the functional-valued discrete time stochastic process $ Z = (Z_k, k\in\N) $, where $ \mathbb{N} = \set{ 1,2,\ldots } $, defined by
 184 \end{itemize}
 185
 186 \begin{columns}
 187   \column{5cm}
 188     \input{tikz/axis2}
 189   \column{5cm}
 190      \[ Z_k(t) = X(t + (k-1)\delta)             \]
 191      \[  k\in\N \;\;\; \forall t \in [0,\delta) \]
 192 \end{columns}
 193
 194 \vfill
 195   If $X$ contents a $\delta-$seasonal component,
 196      $Z$ is particularly fruitful.
 197
 198 \end{frame}
 199
 200 %--------------------------------------------------------------------------
 201
 202 \begin{frame}{Long term objective}
 203
 204 \begin{columns}
 205 \column{.6\textwidth}
 206 %\begin{figure}[!ht]\centering
 207   \includegraphics[width = \textwidth]{pics/schema.png}
 208 %\caption{Hierarchical structure of $N$ individual clients among $K$ groups.}\label{fig:schema-hier}
 209 %\end{figure}
 210
 211 \column{.4\textwidth}
 212 \begin{tikzpicture}[decoration=penciline, decorate]
 213   \node[block, decorate] at (0, 0){$Z_t$} ;
 214   \node[block, decorate] at (3, 0) {$Z_{t + 1}$} ;
 215
 216   \node[block, decorate] at (0, -2.5) {$\begin{pmatrix}
 217                               Z_{t, 1} \\ Z_{t, 2} \\ \vdots \\ Z_{t, K}
 218                                \end{pmatrix}$ };
 219
 220   \node[block, decorate] at (3, -2.5) {$\begin{pmatrix}
 221                            Z_{t+1, 1} \\ Z_{t+1, 2} \\ \vdots \\ Z_{t+1, k}
 222                                \end{pmatrix} $};
 223
 224   \draw[decorate, darkblue,  line width = 2mm, ->] (1, 0) -- (2, 0);
 225   \draw[decorate, darkgreen, line width = 2mm, ->] (1, -2.5) -- (2, -2.5);
 226   \draw[decorate, black,     line width = 2mm, ->] (3, -1.3) -- (3, -0.4);
 227   \draw[decorate, darkred,   line width = 2mm, ->] (1, -1.5) -- (2, -0.75);
 228  \end{tikzpicture}
 229 \end{columns}
 230
 231 \begin{itemize}
 232  \item Groups can express tariffs, geographical dispersion, client class ...
 233  \item \textbf{IDEA}: Use a clustering algorithm to learn groups of customer structure
 234  \item \textbf{Aim}: Set up a classical clustering algorithm to run in parallel
 235 \end{itemize}
 236 \end{frame}
 237
 238 %--------------------------------------------------------------------------
 239
 240 \section{Functional clustering}
 241
 242 \begin{frame}{Aim}
 243
 244 \begin{columns}
 245   \column{0.6\textwidth}
 246   \begin{block}{ }
 247     \begin{itemize}
 248       \item Segmentation of $X$ may not suffices to render reasonable
 249             the stationary hypothesis.
 250       \item If a grouping effect exists, we may considered stationary within each group.
 251       \item Conditionally on the grouping, functional time series prediction methods
 252             can be applied.
 253       \item We propose a clustering procedure that discover the groups from a bunch
 254              of curves.
 255     \end{itemize}
 256
 257     We use wavelet transforms to take into account the fact
 258     that curves may  present non stationary patters.
 259   \end{block}
 260
 261   \column{0.4\textwidth}
 262     \includegraphics[width=0.9\textwidth,
 263                              height=2.7cm]{pics/conso-traj.png}
 264
 265    Two strategies to cluster functional time series:
 266    \begin{enumerate}
 267      \item Feature extraction (summary measures of the curves).
 268      \item Direct similarity between curves.
 269    \end{enumerate}
 270
 271 \end{columns}
 272 \end{frame}
 273
 274 %---------------------------
 275
 276 \begin{frame}[plain]{Wavelets to cope with \textsc{fd}}
 277
 278 \begin{columns}
 279   \column{.6\textwidth}
 280  %\begin{figure}
 281  \centering
 282  \includegraphics[width = \textwidth]{./pics/weekly-5.png}
 283   % * * * * * * * *  * * * * * * * * * * *
 284   \column{.4\textwidth}
 285 \begin{block}{ } %Wavelet transform}
 286 \begin{footnotesize}
 287 \begin{itemize}
 288  \item domain-transform technique for hierarchical decomposing finite energy signals
 289  \item description in terms of a broad trend (\textcolor{PineGreen}{approximation part}), plus a set of localized changes kept in the \textcolor{red}{details parts}.
 290 \end{itemize}
 291 \end{footnotesize}
 292 \end{block}
 293 \end{columns}
 294
 295 \begin{block}{Discrete Wavelet Transform }
 296
 297   If $z \in L_2([0, 1])$ we can write it as
 298
 299    \begin{equation*}\label{eq:zeta}
 300      z(t) = \sum_{k=0}^{2^{j_0}-1} \textcolor{PineGreen}{c_{j_0, k}} \phi_{j_0,k} (t)  +
 301         \sum_{j={j_0}}^{\infty}
 302            \sum_{k=0}^{2^j-1} \textcolor{red}{d_{j,k}} \psi_{j,k} (t) ,
 303    \end{equation*}
 304
 305 %
 306 where $ c_{j,k} = <g, \phi_{j,k} > $, $ d_{j,k} = <g, \varphi_{j,k}>$ are the
 307 \textcolor{PineGreen}{scale coefficients} and \textcolor{red}{wavelet coefficients} respectively, and the functions $\phi$ et $\varphi$ are associated to a orthogonal \textsc{mra} of $L_2([0, 1])$.
 308 \end{block}
 309 \end{frame}
 310
 311 %---------------------------------------- SLIDE ---------------------
 312
 313 \begin{frame}{Energy decomposition of the DWT}
 314
 315 \begin{block}{ }
 316  \begin{itemize}
 317   \item Energy conservation of the signal
 318 %
 319   \begin{equation*}\label{eq:energy}
 320      \| z \|_H^2    \approx     \| \widetilde{z_J} \|_2^2
 321         = c_{0,0}^2 + \sum_{j=0}^{J-1} \sum_{k=0}^{2^j-1} d_{j,k} ^2  =
 322                      c_{0,0}^2 + \sum_{j=0}^{J-1} \| \mathbf{d}_{j} \|_2^2.
 323   \end{equation*}
 324 %  \item characterization by the set of channel variances estimated at the output of the corresponding filter bank
 325  \item For each $j=0,1,\ldots,J-1$, we compute the absolute and
 326  relative contribution representations by
 327 %
 328    \[ \underbrace{\hbox{cont}_j = ||\mathbf{d_j}||^2}_{\fbox{AC}}
 329       \qquad  \text{and}  \qquad
 330        \underbrace{\hbox{rel}_j  =
 331      \frac{||\mathbf{d_j}||^2}
 332           {\sum_j ||\mathbf{d_j}||^2 }}_{\fbox{RC}} .\]
 333  \item They quantify the relative importance of the scales to the global dynamic.
 334 % \item Only the wavelet coefficients $\set{d_{j,k}}$ are used.
 335  \item RC normalizes the energy of each signal to 1.
 336 \end{itemize}
 337 \end{block}
 338 \end{frame}
 339 % =======================================
 340
 341 \begin{frame}
 342   \frametitle{Schema of procedure}
 343   \begin{center}
 344    \includegraphics[width = 7cm, height = 2cm]{./pics/Diagramme1.png}
 345    % Diagramme1.png: 751x260 pixel, 72dpi, 26.49x9.17 cm, bb=0 0 751 260
 346   \end{center}
 347
 348         \begin{footnotesize}
 349 \begin{description}
 350  \item [0. Data preprocessing.] Approximate sample paths of $z_1(t),\ldots,z_n(t)$ %by the truncated wavelet series at the scale $J$ from sampled data $\mathbf{z}_1, \ldots, \mathbf{z}_n$.
 351  \item [1. Feature extraction.] Compute either of the energetic components using absolute contribution (AC) or relative contribution (RC).
 352  \item [2. Feature selection.] Screen irrelevant variables. \begin{tiny} [Steinley \& Brusco ('06)]\end{tiny}
 353  \item [3. Determine the number of clusters.] Detecting significant jumps in the transformed distortion curve.
 354  \begin{tiny} [Sugar \& James ('03)]\end{tiny}
 355  \item [4. Clustering.] Obtain the $K$ clusters using PAM algorithm.
 356 \end{description}       \end{footnotesize}
 357
 358 \footnotetext[1]{Antoniadis, X. Brossat, J. Cugliari et J.-M. Poggi (2013), Clustering Functional Data Using Wavelets, {\it IJWMIP}, 11(1), 35--64}
 359
 360 \end{frame}
 361
 362 % ===========================================
 363
 364 \section{Parallel $k$-medoids}
 365
 366 \begin{frame}{Partitioning Around Medoids (PAM)
 367       \begin{scriptsize} \hfill [Kaufman et Rousseeuw~(1987)] \end{scriptsize}}
 368
 369 \begin{itemize}
 370  \item Partition the $n$ points $R^d$-scatter into $K$ clusters
 371  \item Optimization problem :
 372  \[ D(x) = \min_{m_1,\dots,m_k \in \mathbb{R}^d} \sum_{i=1}^{n} \min_{j=1,\dots,k} \| x_i - m_j \| \, ,\]
 373 with $x = (x_1,\dots,x_n)$, $\|\,.\,\|$ can be any norm. Here we choose to use the euclidean norm.
 374   \item Robust version of $k$-means
 375   \item Computational burden : medians instead of means
 376   \item Several heuristics allow to reduce the computation time.
 377 \end{itemize}
 378 \end{frame}
 379
 380 % ===========================================
 381
 382 \begin{frame}{Parallelization with MPI}
 383
 384 \begin{columns}
 385 \column{.8\textwidth}
 386 \begin{itemize}
 387  \item Easy to use library routines allowing to write algorithms in parallel
 388  \item Available on several languages
 389  \item We use the master-slave mode
 390 \end{itemize}
 391
 392 \column{.2\textwidth}
 393 \includegraphics[width=\textwidth]{./pics/open-mpi-logo.png}
 394 \end{columns}
 395
 396 \vfill
 397
 398 \begin{block}{The outline of code:}
 399 \begin{enumerate}
 400   \item The master process splits the problem in tasks over the data set and sends it to the workers;
 401   \item Each worker reduces the functional nature of the data using the DWT, applies the clustering and returns the centers;
 402   \item The master recuperates and clusters the centers into $K$ meta centers.
 403 \end{enumerate}
 404 \end{block}
 405
 406 The source code is open and will be available to download from
 407 \href{https://github.com/}{github}.
 408
 409 \footnotetext[1]{B. Auder \& J. Cugliari. Parallélisation de l'algorithme des $k$-médoïdes. Application au clustering de courbes. (2014, submitted)}
 410 \end{frame}
 411
 412 \section{Numerical experiences}
 413
 414 % ===========================================
 415
 416 \begin{frame}{Application I: Starlight curves}
 417
 418 \begin{itemize}
 419  \item Data from UCR Time Series Classification/Clustering
 420  \item 1000 curves learning set + 8236 validation set ($d= 1024$)% discretization points
 421 \end{itemize}
 422
 423 \begin{figure}[H]
 424 \begin{minipage}[c]{.32\linewidth}
 425     \includegraphics[width=\linewidth,height=3.5cm]{pics/slgr1.png}
 426     %\vspace*{-0.3cm}
 427     \caption{Groupe 1}
 428 \end{minipage}
 429 \begin{minipage}[c]{.32\linewidth}
 430     \includegraphics[width=\linewidth,height=3.5cm]{pics/slgr2.png}
 431     %\vspace*{-0.3cm}
 432     \caption{Groupe 2}
 433 \end{minipage}
 434 \begin{minipage}[c]{.32\linewidth}
 435     \includegraphics[width=\linewidth,height=3.5cm]{pics/slgr3.png}
 436     %\vspace*{-0.3cm}
 437     \caption{Groupe 3}
 438 \end{minipage}
 439 \label{figsltr3clusts}
 440 \end{figure}
 441
 442 \begin{table}[H]
 443 \centering
 444 \begin{tabular}{lccc}                           \toprule
 445  &            & \multicolumn{2}{c}{Adequacy} \\
 446  & Distortion & Internal & External          \\ \midrule
 447 Training (sequential) & 1.31e4 & 0.79 & 0.77 \\
 448 Training (parallel)   & 1.40e4 & 0.79 & 0.68 \\
 449 Test (sequential)     & 1.09e5 & 0.78 & 0.76 \\
 450 Test (parallel)       & 1.15e5 & 0.78 & 0.69 \\ \bottomrule
 451 \end{tabular}
 452 %\caption{Distorsions et indices d'adéquation des partitions}
 453 \label{tabDistorSl}
 454 \end{table}
 455 \end{frame}
 456
 457 % ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 458
 459 \begin{frame}{Application II: EDF data}
 460     \begin{figure}
 461     \centering
 462     \includegraphics[width= 0.9\textwidth]{pics/conso-shapes.png}
 463     % conso-traj.eps: 0x0 pixel, 300dpi, 0.00x0.00 cm, bb=18 18 577 824
 464     \caption{  \begin{footnotesize}
 465 French electricity power demand on autumn (top left), winter (bottom left), spring (top right) and summer (bottom right). \end{footnotesize} }
 466     \label{fig:conso-shapes}
 467     \end{figure}
 468
 469     \begin{footnotesize}
 470  Feature extraction:
 471   \begin{itemize}
 472     \item The significant scales for revealing the cluster structure are independent of the possible number of clusters.
 473     \item Significant scales are associated to mid-frequencies.
 474     \item The retained scales parametrize the represented cycles of 1.5, 3 and 6  hours (AC).
 475  \end{itemize}                              \end{footnotesize}
 476 \end{frame}
 477
 478
 479 % ===========================================
 480
 481 \begin{frame}
 482 \begin{figure}
 483   \centering
 484   \includegraphics[width= 0.9\textwidth]{./pics/conso_jump_AC.png} \\
 485   \caption{ \begin{footnotesize}
 486 Number of clusters by feature extraction of the AC (top). From left to right:  distortion curve, transformed distortion curve and first difference on the transformed distortion curve. \end{footnotesize} }
 487   \label{fig:conso-jumps}
 488 \end{figure}
 489  \end{frame}
 490
 491 % ===========================================
 492
 493 \begin{frame}
 494 \begin{figure} \centering
 495   \begin{subfigure}[t]{0.45\textwidth}
 496     \includegraphics[width=\textwidth]{./pics/conso_AC-curves.png}
 497     \caption{Cluster}
 498   \end{subfigure}
 499   ~
 500   \begin{subfigure}[t]{0.45\textwidth}
 501     \includegraphics[width=\textwidth]{./pics/conso_AC-calendar.png}
 502     \caption{Calendar}
 503   \end{subfigure}
 504 %      \subfloat[Calendar]{\label{fig:conso_clust_AC_cal}
 505 %   \includegraphics[width = 0.45\textwidth]{./pics/conso_AC-calendar.png}}
 506 \caption{Curves membership of the clustering using AC based dissimilarity (a) and the corresponding calendar positioning (b).}
 507   \end{figure}
 508 \end{frame}
 509
 510
 511 % ===========================================
 512
 513
 514 \begin{frame}{Application III: Electricity Smart Meter CBT (ISSDA)} \small
 515
 516 \footnotetext[1]{\textit{Irish Social Science Data Archive}, \url{http://www.ucd.ie/issda/data/}}
 517
 518 \begin{itemize}
 519  \item 4621 Irish households smart meter data % eséries de consommation électrique de foyers irlandais
 520  \item About 25K discretization points
 521  \item We test with $K=$ 3 or 5 classes
 522  \item We compare sequential and parallel versions
 523 \end{itemize}
 524
 525
 526 \begin{table}[H]
 527 \centering
 528 \begin{tabular}{lcc}                       \toprule
 529 % &            &       \\
 530  & Distortion & Internal adequacy  \\ \midrule
 531 3 clusters sequential & 1.90e7 & 0.90   \\
 532 3 clusters parallel   & 2.15e7 & 0.90   \\
 533 5 clusters sequential & 1.61e7 & 0.89   \\
 534 5 clusters parallel   & 1.84e7 & 0.89   \\ \bottomrule
 535 \end{tabular}
 536 %  \caption{Distorsions et indices d'adéquation des partitions}
 537 \label{tabDistorIr}
 538 \end{table}
 539
 540 \end{frame}
 541
 542 %--------------------------------------------------------------------------
 543
 544 \section{Conclusion}
 545
 546 \begin{frame}{Conclusion}
 547
 548 \begin{itemize}
 549  \item Identification of customers groups from smartmeter data
 550  \item Wavelets allow to capture the functional nature of the data
 551  \item Clustering algorithm upscale envisaged for millions of curves
 552  \item \textit{Divide-and-Conquer} approach thanks to MPI library %pour l'algorithme des $k$-médoïdes : d'abord  sur des groupes de données courbes, puis des groupes de médoïdes jusqu'à obtenir un seul ensemble traité sur un processseur.
 553  %\item %Les résultats obtenus sur les deux jeux de données présentés sont assez encourageants, et permettent d'envisager une utilisation à plus grande échelle.
 554 \end{itemize}
 555
 556 \begin{block}{Further work}
 557 \begin{itemize}
 558  \item Go back to the prediction task
 559  \item Apply the algorithm over many hundreds of processors
 560  \item Connect the clustering method with a prediction model
 561 \end{itemize}
 562 \end{block}
 563 \end{frame}
 564
 565 %--------------------------------------------------------------------------
 566
 567 \begin{frame}[plain]{Bibliographie}\small
 568
 569 \begin{thebibliography}{10}
 570 \bibitem{1} A. Antoniadis, X. Brossat, J. Cugliari et J.-M. Poggi (2013), Clustering Functional Data Using Wavelets, {\it IJWMIP}, 11(1), 35--64
 571
 572 \bibitem{2} R. Bekkerman, M. Bilenko et J. Langford - éditeurs (2011), Scaling up Machine Learning: Parallel and Distributed Approaches, {\it Cambridge University Press}
 573
 574 \bibitem{3} P. Berkhin (2006), A Survey of Clustering Data Mining Techniques, {\it Grouping Multidimensional Data, éditeurs : J. Kogan, C. Nicholas, M. Teboulle}.
 575
 576 \bibitem{6} J. Dean et S. Ghemawat (2004), MapReduce: Simplified Data Processing on Large Clusters, {\it Sixth Symposium on Operating System Design and Implementation}.
 577
 578 \bibitem{7} G. De Francisci Morales et A. Bifet (2013), G. De Francisci Morales SAMOA: A Platform for Mining Big Data Streams Keynote Talk at RAMSS ’13: 2nd International Workshop on Real-Time Analysis and Mining of Social Streams WWW, Rio De Janeiro
 579
 580 \bibitem{10} L. Kaufman et P.J. Rousseeuw (1987), Clustering by means of Medoids, {\it Statistical Data Analysis Based on the L\_1-Norm and Related Methods, éditeur : Y. Dodge}.
 581 \end{thebibliography}
 582 \end{frame}
 583
 584
 585 \end{document}
 586
 587
 588 % \begin{frame}{Motivation académique: Big Data}
 589 % \begin{itemize}
 590 %  \item Besoins spécifiques: très grands volumes de données, grande dimension
 591 %  \item Réponses: algorithmes opérant sur de grands graphes (Kang et al.~2009), sur des flux de données haut débit (De Francisci Morales et Bifet~2013)
 592 %  \item Bekkerman et al.~(2011): algorithmes de Machine Learning s'exécutant en parallèle
 593 % \end{itemize}
 594 %
 595 % \begin{itemize}
 596 %  \item classification non supervisée (\textit{clustering}): regrouper les données en \textit{clusters} homogènes, suffisamment distincts deux à deux
 597 %  \item nombreux algorithmes depuis Tyron~(1939) (voir Berkhin~2006 pour une revue)
 598 %  \item cependant la notion de cluster varie en fonction des données, du contexte et de l'algorithme utilisé
 599 %  \item technique très populaire qui permet
 600 % de réduire la taille des données en les résumant à quelques représentants
 601 % \end{itemize}
 602 % \end{frame}
 603