[synclust.git] / inst / doc / convex_optimization.tex

\documentclass{article}
\usepackage{a4wide}
\usepackage{graphicx}
\def\pen{\textrm{pen}}
\def\L{\mathcal{L}}
\title{\bf Detecting areas with synchronous temporal dynamics}
\author{Christophe Giraud}

\begin{document}
\maketitle

\noindent This document summarizes the algorithm used when function \textit{findSyncVarRegions()} is called with first argument method=``convex''. Reading first the article \emph{Delimiting synchronous populations from monitoring data} by Giraud et al. is recommanded, since we use here the same notations.

\section{Model and estimation procedure}

\subsection{Goal}

We write $Z_{stk}$ for the $k$th observations, year $t$, site $s$ and $z_{st}=\sum_{k}Z_{stk}$.
Our goal is to estimate regions $R$ such that
\begin{equation}\label{model}
Z_{stk}\sim \textrm{Poisson}(\exp(\theta_{s}+f(x_{s},t)))\quad\textrm{with } f(x,t)\approx \sum_{R}\rho_{R}(t){\bf 1}_{x\in R}.
\end{equation}
 In other words, we try to estimate $f$ with the a priori that
\begin{itemize}
\item for each year $t$ the map $x \to f(x,t)$ is piecewise constant
\item the boundary of the regions where $x \to f(x,t)$ is constant are the same for all year $t$.
\end{itemize}
The main difficulty is to detect the regions $R$.

\subsection{Estimation procedure}
Let $G$ be a graph and write $V(s)$ for the set of the neighbors of $s$ in G.
The estimators $\widehat \theta$ and $\widehat f$ are defined as minimizers of 
$$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
\sum_{s\stackrel{G}{\sim}u}\|f_{s.}-f_{u.}\|/D_{su}$$
with boundary conditions: $f_{s1}=0$ for all $s$. We typically choose $D_{su}=1/|V(s)|+1/|V(u)|$.
 
\section{Optimization algorithm}

The following quantity is to be minimized
$$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha\sum_{s\stackrel{G}{\sim}u}\|f_{s.}-f_{u.}\|/D_{su}$$
with boundary conditions: $f_{s1}=0$ for all $s$.
This last expression can be rewritten into
$$\mathcal{L}(\theta,f)+\alpha \pen(f)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
\sum_{s\stackrel{G}{\sim}u}\max_{\|\phi_{su}\|\leq 1}\langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}$$
with $\phi_{su}\in\mathbf R^T$.

\newpage
\noindent Let us introduce
$$F(\theta,f,\phi)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
\sum_{s<u}{\bf 1}_{s\stackrel{G}{\sim}u}\ \langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}.$$
We can reformulate the quantity to be optimized using $F$ as follows.
$$\mathcal{L}(\theta,f)+\alpha \pen(f)=\max_{\max_{s< u}\|\phi_{su}\|\leq 1}F(\theta,f,\phi).$$
The penalized log-likelihood can now be minimized with the following steps.

\subsection*{Application}

Iterate until convergence:
\begin{enumerate}
\item gradient descent in $\theta$:\\* $\theta\leftarrow \theta - h \nabla_{\theta}F$
\item gradient descent in $f$ with condition $f[\ ,1]=0$\\*
$f[\ ,-1]\leftarrow f[\ ,-1]-h'\nabla_{f[\ ,-1]}F$ 
\item gradient ascent in $\phi$\\*
$\phi_{su}\leftarrow \phi_{su}+h''\nabla_{\phi_{su}}F$
\item $\phi_{su}\leftarrow \phi_{su}/\max(1,\|\phi_{su}\|)$
\end{enumerate}
Return($\theta,f$)

%\subsection*{Gradient en $\theta$:}
%On a
%$$\mathcal{L}(\theta,f)=\sum_{s}\left[e^{\theta_{s}}\sum_{t}e^{f_{st}}-\theta_{s}\sum_{t}z_{st}\right]+\ldots$$
%Donc 
%$$\partial_{\theta_{s}}F=e^{\theta_{s}}\sum_{t}e^{f_{st}}-\sum_{t}z_{st}$$
%
%
%\subsection*{Gradient en $f$:} on note $\phi_{su}=-\phi_{us}$ pour $s>u$
%$$\partial_{f_{st}}F=e^{\theta_{s}}e^{f_{st}}-z_{st}+\alpha\sum_{u\in V(s)}\phi_{su}/D_{su}$$
%
%
%\subsection*{Gradient en $\lambda$:}
%pour $s<u$ avec $s\sim u$
%$$\nabla_{\phi_{su}}F=\alpha(f_{s.}-f_{u.})/D_{su}.$$

\end{document}
Commit	Line	Data
15d1825d BA	1	\documentclass{article}
	2	\usepackage{a4wide}
	3	\usepackage{graphicx}
	4	\def\pen{\textrm{pen}}
	5	\def\L{\mathcal{L}}
	6	\title{\bf Detecting areas with synchronous temporal dynamics}
	7	\author{Christophe Giraud}
	8
	9	\begin{document}
	10	\maketitle
	11
	12	\noindent This document summarizes the algorithm used when function \textit{findSyncVarRegions()} is called with first argument method=``convex''. Reading first the article \emph{Delimiting synchronous populations from monitoring data} by Giraud et al. is recommanded, since we use here the same notations.
	13
	14	\section{Model and estimation procedure}
	15
	16	\subsection{Goal}
	17
	18	We write $Z_{stk}$ for the $k$th observations, year $t$, site $s$ and $z_{st}=\sum_{k}Z_{stk}$.
	19	Our goal is to estimate regions $R$ such that
	20	\begin{equation}\label{model}
	21	Z_{stk}\sim \textrm{Poisson}(\exp(\theta_{s}+f(x_{s},t)))\quad\textrm{with } f(x,t)\approx \sum_{R}\rho_{R}(t){\bf 1}_{x\in R}.
	22	\end{equation}
	23	In other words, we try to estimate $f$ with the a priori that
	24	\begin{itemize}
	25	\item for each year $t$ the map $x \to f(x,t)$ is piecewise constant
	26	\item the boundary of the regions where $x \to f(x,t)$ is constant are the same for all year $t$.
	27	\end{itemize}
	28	The main difficulty is to detect the regions $R$.
	29
	30	\subsection{Estimation procedure}
	31	Let $G$ be a graph and write $V(s)$ for the set of the neighbors of $s$ in G.
	32	The estimators $\widehat \theta$ and $\widehat f$ are defined as minimizers of
	33	$$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
	34	\sum_{s\stackrel{G}{\sim}u}\\|f_{s.}-f_{u.}\\|/D_{su}$$
	35	with boundary conditions: $f_{s1}=0$ for all $s$. We typically choose $D_{su}=1/\|V(s)\|+1/\|V(u)\|$.
	36
	37	\section{Optimization algorithm}
	38
	39	The following quantity is to be minimized
	40	$$\mathcal{L}(\theta,f)+\alpha \pen(f):=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha\sum_{s\stackrel{G}{\sim}u}\\|f_{s.}-f_{u.}\\|/D_{su}$$
	41	with boundary conditions: $f_{s1}=0$ for all $s$.
	42	This last expression can be rewritten into
	43	$$\mathcal{L}(\theta,f)+\alpha \pen(f)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
	44	\sum_{s\stackrel{G}{\sim}u}\max_{\\|\phi_{su}\\|\leq 1}\langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}$$
	45	with $\phi_{su}\in\mathbf R^T$.
	46
	47	\newpage
	48	\noindent Let us introduce
	49	$$F(\theta,f,\phi)=\sum_{s,t}[e^{\theta_{s}+f_{st}}-z_{st}(\theta_{s}+f_{st})]+\alpha
	50	\sum_{s<u}{\bf 1}_{s\stackrel{G}{\sim}u}\ \langle\phi_{su},f_{s.}-f_{u.}\rangle/D_{su}.$$
	51	We can reformulate the quantity to be optimized using $F$ as follows.
	52	$$\mathcal{L}(\theta,f)+\alpha \pen(f)=\max_{\max_{s< u}\\|\phi_{su}\\|\leq 1}F(\theta,f,\phi).$$
	53	The penalized log-likelihood can now be minimized with the following steps.
	54
	55	\subsection*{Application}
	56
	57	Iterate until convergence:
	58	\begin{enumerate}
	59	\item gradient descent in $\theta$:\\* $\theta\leftarrow \theta - h \nabla_{\theta}F$
	60	\item gradient descent in $f$ with condition $f[\ ,1]=0$\\*
	61	$f[\ ,-1]\leftarrow f[\ ,-1]-h'\nabla_{f[\ ,-1]}F$
	62	\item gradient ascent in $\phi$\\*
	63	$\phi_{su}\leftarrow \phi_{su}+h''\nabla_{\phi_{su}}F$
	64	\item $\phi_{su}\leftarrow \phi_{su}/\max(1,\\|\phi_{su}\\|)$
65	\end{enumerate}
66	Return($\theta,f$)
67
68	%\subsection*{Gradient en $\theta$:}
69	%On a
70	%$$\mathcal{L}(\theta,f)=\sum_{s}\left[e^{\theta_{s}}\sum_{t}e^{f_{st}}-\theta_{s}\sum_{t}z_{st}\right]+\ldots$$
71	%Donc
72	%$$\partial_{\theta_{s}}F=e^{\theta_{s}}\sum_{t}e^{f_{st}}-\sum_{t}z_{st}$$
73	%
74	%
75	%\subsection*{Gradient en $f$:} on note $\phi_{su}=-\phi_{us}$ pour $s>u$
76	%$$\partial_{f_{st}}F=e^{\theta_{s}}e^{f_{st}}-z_{st}+\alpha\sum_{u\in V(s)}\phi_{su}/D_{su}$$
77	%
78	%
79	%\subsection*{Gradient en $\lambda$:}
80	%pour $s<u$ avec $s\sim u$
81	%$$\nabla_{\phi_{su}}F=\alpha(f_{s.}-f_{u.})/D_{su}.$$
82
83	\end{document}