update
[synclust.git] / inst / doc / convex_optimization.tex
CommitLineData
15d1825d
BA
1\documentclass{article}
2\usepackage{a4wide}
3\usepackage{graphicx}
4\def\pen{\textrm{pen}}
5\def\L{\mathcal{L}}
6\title{\bf Detecting areas with synchronous temporal dynamics}
7\author{Christophe Giraud}
8
9\begin{document}
10\maketitle
11
12\noindent This document summarizes the algorithm used when function \textit{findSyncVarRegions()} is called with first argument method=``convex''. Reading first the article \emph{Delimiting synchronous populations from monitoring data} by Giraud et al. is recommanded, since we use here the same notations.
13
14\section{Model and estimation procedure}
15
16\subsection{Goal}
17
18We write $Z_{stk}$ for the $k$th observations, year $t$, site $s$ and $z_{st}=\sum_{k}Z_{stk}$.
19Our goal is to estimate regions $R$ such that
20\begin{equation}\label{model}
21Z_{stk}\sim \textrm{Poisson}(\exp(\theta_{s}+f(x_{s},t)))\quad\textrm{with } f(x,t)\approx \sum_{R}\rho_{R}(t){\bf 1}_{x\in R}.
22\end{equation}
23 In other words, we try to estimate $f$ with the a priori that
24\begin{itemize}
25\item for each year $t$ the map $x \to f(x,t)$ is piecewise constant
26\item the boundary of the regions where $x \to f(x,t)$ is constant are the same for all year $t$.
27\end{itemize}
28The main difficulty is to detect the regions $R$.
29
30\subsection{Estimation procedure}
31Let $G$ be a graph and write $V(s)$ for the set of the neighbors of $s$ in G.
32The estimators $\widehat \theta$ and $\widehat f$ are defined as minimizers of
33$$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
34\sum_{s\stackrel{G}{\sim}u}\|f_{s.}-f_{u.}\|/D_{su}$$
35with boundary conditions: $f_{s1}=0$ for all $s$. We typically choose $D_{su}=1/|V(s)|+1/|V(u)|$.
36
37\section{Optimization algorithm}
38
39The following quantity is to be minimized
40$$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha\sum_{s\stackrel{G}{\sim}u}\|f_{s.}-f_{u.}\|/D_{su}$$
41with boundary conditions: $f_{s1}=0$ for all $s$.
42This last expression can be rewritten into
43$$\mathcal{L}(\theta,f)+\alpha \pen(f)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
44\sum_{s\stackrel{G}{\sim}u}\max_{\|\phi_{su}\|\leq 1}\langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}$$
45with $\phi_{su}\in\mathbf R^T$.
46
47\newpage
48\noindent Let us introduce
49$$F(\theta,f,\phi)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
50\sum_{s<u}{\bf 1}_{s\stackrel{G}{\sim}u}\ \langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}.$$
51We can reformulate the quantity to be optimized using $F$ as follows.
52$$\mathcal{L}(\theta,f)+\alpha \pen(f)=\max_{\max_{s< u}\|\phi_{su}\|\leq 1}F(\theta,f,\phi).$$
53The penalized log-likelihood can now be minimized with the following steps.
54
55\subsection*{Application}
56
57Iterate until convergence:
58\begin{enumerate}
59\item gradient descent in $\theta$:\\* $\theta\leftarrow \theta - h \nabla_{\theta}F$
60\item gradient descent in $f$ with condition $f[\ ,1]=0$\\*
61$f[\ ,-1]\leftarrow f[\ ,-1]-h'\nabla_{f[\ ,-1]}F$
62\item gradient ascent in $\phi$\\*
63$\phi_{su}\leftarrow \phi_{su}+h''\nabla_{\phi_{su}}F$
64\item $\phi_{su}\leftarrow \phi_{su}/\max(1,\|\phi_{su}\|)$
65\end{enumerate}
66Return($\theta,f$)
67
68%\subsection*{Gradient en $\theta$:}
69%On a
70%$$\mathcal{L}(\theta,f)=\sum_{s}\left[e^{\theta_{s}}\sum_{t}e^{f_{st}}-\theta_{s}\sum_{t}z_{st}\right]+\ldots$$
71%Donc
72%$$\partial_{\theta_{s}}F=e^{\theta_{s}}\sum_{t}e^{f_{st}}-\sum_{t}z_{st}$$
73%
74%
75%\subsection*{Gradient en $f$:} on note $\phi_{su}=-\phi_{us}$ pour $s>u$
76%$$\partial_{f_{st}}F=e^{\theta_{s}}e^{f_{st}}-z_{st}+\alpha\sum_{u\in V(s)}\phi_{su}/D_{su}$$
77%
78%
79%\subsection*{Gradient en $\lambda$:}
80%pour $s<u$ avec $s\sim u$
81%$$\nabla_{\phi_{su}}F=\alpha(f_{s.}-f_{u.})/D_{su}.$$
82
83\end{document}