first commit
[synclust.git] / inst / doc / convex_optimization.tex
1 \documentclass{article}
2 \usepackage{a4wide}
3 \usepackage{graphicx}
4 \def\pen{\textrm{pen}}
5 \def\L{\mathcal{L}}
6 \title{\bf Detecting areas with synchronous temporal dynamics}
7 \author{Christophe Giraud}
8
9 \begin{document}
10 \maketitle
11
12 \noindent This document summarizes the algorithm used when function \textit{findSyncVarRegions()} is called with first argument method=``convex''. Reading first the article \emph{Delimiting synchronous populations from monitoring data} by Giraud et al. is recommanded, since we use here the same notations.
13
14 \section{Model and estimation procedure}
15
16 \subsection{Goal}
17
18 We write $Z_{stk}$ for the $k$th observations, year $t$, site $s$ and $z_{st}=\sum_{k}Z_{stk}$.
19 Our goal is to estimate regions $R$ such that
20 \begin{equation}\label{model}
21 Z_{stk}\sim \textrm{Poisson}(\exp(\theta_{s}+f(x_{s},t)))\quad\textrm{with } f(x,t)\approx \sum_{R}\rho_{R}(t){\bf 1}_{x\in R}.
22 \end{equation}
23 In other words, we try to estimate $f$ with the a priori that
24 \begin{itemize}
25 \item for each year $t$ the map $x \to f(x,t)$ is piecewise constant
26 \item the boundary of the regions where $x \to f(x,t)$ is constant are the same for all year $t$.
27 \end{itemize}
28 The main difficulty is to detect the regions $R$.
29
30 \subsection{Estimation procedure}
31 Let $G$ be a graph and write $V(s)$ for the set of the neighbors of $s$ in G.
32 The estimators $\widehat \theta$ and $\widehat f$ are defined as minimizers of
33 $$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
34 \sum_{s\stackrel{G}{\sim}u}\|f_{s.}-f_{u.}\|/D_{su}$$
35 with boundary conditions: $f_{s1}=0$ for all $s$. We typically choose $D_{su}=1/|V(s)|+1/|V(u)|$.
36
37 \section{Optimization algorithm}
38
39 The following quantity is to be minimized
40 $$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha\sum_{s\stackrel{G}{\sim}u}\|f_{s.}-f_{u.}\|/D_{su}$$
41 with boundary conditions: $f_{s1}=0$ for all $s$.
42 This last expression can be rewritten into
43 $$\mathcal{L}(\theta,f)+\alpha \pen(f)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
44 \sum_{s\stackrel{G}{\sim}u}\max_{\|\phi_{su}\|\leq 1}\langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}$$
45 with $\phi_{su}\in\mathbf R^T$.
46
47 \newpage
48 \noindent Let us introduce
49 $$F(\theta,f,\phi)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
50 \sum_{s<u}{\bf 1}_{s\stackrel{G}{\sim}u}\ \langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}.$$
51 We can reformulate the quantity to be optimized using $F$ as follows.
52 $$\mathcal{L}(\theta,f)+\alpha \pen(f)=\max_{\max_{s< u}\|\phi_{su}\|\leq 1}F(\theta,f,\phi).$$
53 The penalized log-likelihood can now be minimized with the following steps.
54
55 \subsection*{Application}
56
57 Iterate until convergence:
58 \begin{enumerate}
59 \item gradient descent in $\theta$:\\* $\theta\leftarrow \theta - h \nabla_{\theta}F$
60 \item gradient descent in $f$ with condition $f[\ ,1]=0$\\*
61 $f[\ ,-1]\leftarrow f[\ ,-1]-h'\nabla_{f[\ ,-1]}F$
62 \item gradient ascent in $\phi$\\*
63 $\phi_{su}\leftarrow \phi_{su}+h''\nabla_{\phi_{su}}F$
64 \item $\phi_{su}\leftarrow \phi_{su}/\max(1,\|\phi_{su}\|)$
65 \end{enumerate}
66 Return($\theta,f$)
67
68 %\subsection*{Gradient en $\theta$:}
69 %On a
70 %$$\mathcal{L}(\theta,f)=\sum_{s}\left[e^{\theta_{s}}\sum_{t}e^{f_{st}}-\theta_{s}\sum_{t}z_{st}\right]+\ldots$$
71 %Donc
72 %$$\partial_{\theta_{s}}F=e^{\theta_{s}}\sum_{t}e^{f_{st}}-\sum_{t}z_{st}$$
73 %
74 %
75 %\subsection*{Gradient en $f$:} on note $\phi_{su}=-\phi_{us}$ pour $s>u$
76 %$$\partial_{f_{st}}F=e^{\theta_{s}}e^{f_{st}}-z_{st}+\alpha\sum_{u\in V(s)}\phi_{su}/D_{su}$$
77 %
78 %
79 %\subsection*{Gradient en $\lambda$:}
80 %pour $s<u$ avec $s\sim u$
81 %$$\nabla_{\phi_{su}}F=\alpha(f_{s.}-f_{u.})/D_{su}.$$
82
83 \end{document}