add alternative approach from 2013-01
[synclust.git] / src / sources / convexSolver.c
1 #include "sources/convexSolver.h"
2 #include <stdio.h> //to trace LL evolution
3 #include <stdlib.h>
4 #include <math.h>
5 #include "sources/utils/algebra.h"
6
7 // auxiliary to compute log-likelihood + penalty
8 double computeLogLikelihood(
9 double** f, double* theta, double** Z, double*** phi,
10 int* lengthNIix, int** NIix, double alpha, int nrow, int ncol)
11 {
12 double LL = 0.0;
13
14 // for each row in data matrix: (one row = observations from 2001 to 2009)
15 for (int i=0; i<nrow; i++)
16 {
17 // theta[i] == -INFINITY if no birds were seen at this site
18 if (theta[i] != -INFINITY)
19 {
20 // for each year from 2001 to 2009:
21 for (int j=0; j<ncol; j++)
22 LL += (exp(theta[i] + f[i][j]) - Z[i][j] * (theta[i] + f[i][j]));
23 }
24 // add penalty term
25 double penalty = 0.0;
26 double Ds = 1.0/lengthNIix[i];
27 // lengthNIix[i] == size of the neighborhood of site i
28 for (int j=0; j<lengthNIix[i]; j++)
29 {
30 // compute <phi[s,u] , f[s,] - f[u,]> with u == NIix[i][j] (j-th neighbor of i)
31 double dotProduct = 0.0;
32 for (int jj=0; jj<ncol; jj++)
33 dotProduct += phi[i][NIix[i][j]][jj] * (f[i][jj] - f[NIix[i][j]][jj]);
34 // normalization by sum of inverses of neighborhoods sizes
35 double Dsu = Ds + 1.0/lengthNIix[NIix[i][j]];
36 penalty += dotProduct / Dsu;
37 }
38 LL += alpha * penalty;
39 }
40
41 return LL;
42 }
43
44 // compute estimated ("repaired", "smoothed"...) variations from rows of M
45 // NOTE: geographic coordinates dropped here, since they are unused
46 Parameters getVarsWithConvexOptim_core(
47 double* pM, int* lengthNIix, int** NIix, int nrow, int ncol,
48 double alpha, double h, double epsilon, int maxiter, bool symmNeighbs, bool trace)
49 {
50 double EPS = 1e-10; // HACK: some small numerical constant to avoid oddities
51
52 // theta_s = log(average z_st)
53 double* theta = (double*)calloc(nrow,sizeof(double));
54 // NOTE:: Z is 'double' because it is [can be] an average value (of integers)
55 double** Z = (double**)malloc(nrow*sizeof(double*));
56 for (int i=0; i<nrow; i++)
57 {
58 Z[i] = (double*)malloc(ncol*sizeof(double));
59 for (int j=0; j<ncol; j++)
60 {
61 Z[i][j] = pM[i*ncol+j];
62 theta[i] += Z[i][j];
63 }
64 // since pM values are assumed to be integers (and ncol not too high ?!),
65 // the following test may be simplified into (theta[i]==0.0)
66 if (fabs(theta[i]) < EPS)
67 theta[i] = -INFINITY;
68 else
69 theta[i] = log(theta[i]/ncol);
70 }
71 // initialize f to observed variations
72 double** F = (double**)malloc(nrow*sizeof(double*));
73 for (int i=0; i<nrow; i++)
74 {
75 F[i] = (double*)calloc(ncol,sizeof(double));
76 if (theta[i] != -INFINITY)
77 {
78 for (int j=0; j<ncol; j++)
79 {
80 if (Z[i][j] > 0.0)
81 F[i][j] = log(Z[i][j]) - theta[i];
82 }
83 }
84 }
85 // phi_s,u = 1/sqrt(ncol) (1 ... 1) [TODO:: costly in memory !]
86 double invSqrtNcol = 1.0/sqrt(ncol);
87 double*** phi = (double***)malloc(nrow*sizeof(double**));
88 for (int i=0; i<nrow; i++)
89 {
90 phi[i] = (double**)malloc(nrow*sizeof(double*));
91 for (int ii=0; ii<nrow; ii++)
92 {
93 phi[i][ii] = (double*)malloc(ncol*sizeof(double));
94 for (int j=0; j<ncol; j++)
95 phi[i][ii][j] = invSqrtNcol;
96 }
97 }
98
99 // initialize log-likelihood
100 double LL = computeLogLikelihood(
101 F, theta, Z, phi, lengthNIix, NIix, alpha, nrow, ncol);
102 double oldLL = -INFINITY;
103
104 /*******************
105 * START ITERATIONS
106 *******************/
107
108 int kounter = 0; // limit iterations count, in case of
109 while (fabs(LL - oldLL) >= epsilon && kounter++ < maxiter)
110 {
111 // gradient descent for theta
112 for (int i=0; i<nrow; i++) {
113 if (theta[i] == -INFINITY)
114 continue; // skip these sites: cannot get information
115 double sumExpFst = 0.0;
116 for (int j=0; j<ncol; j++)
117 sumExpFst += exp(F[i][j]);
118 double sumZst = 0.0;
119 for (int j=0; j<ncol; j++)
120 sumZst += Z[i][j];
121 double gradI = exp(theta[i]) * sumExpFst - sumZst;
122 theta[i] -= h * gradI;
123 }
124
125 // gradient descent for f
126 double sumDdivPhi;
127 for (int i=0; i<nrow; i++)
128 {
129 double invDs = 1.0/lengthNIix[i];
130 for (int j=0; j<ncol; j++)
131 {
132 double gradIJ = - Z[i][j];
133 if (theta[i] != -INFINITY)
134 {
135 // no theta[i] contribution if nothing observed
136 gradIJ += exp(theta[i] + F[i][j]);
137 }
138 // + sum on neighbors u: s-->u, - sum on neighbors u: u-->s
139 sumDdivPhi = 0.0;
140 for (int jj=0; jj<lengthNIix[i]; jj++)
141 {
142 double Dsu = invDs + 1.0/lengthNIix[NIix[i][jj]];
143 sumDdivPhi += phi[i][NIix[i][jj]][j] / Dsu;
144 if (symmNeighbs)
145 {
146 //shortcut: if symmetric neighborhoods, it's easy to sum on u-->s
147 sumDdivPhi -= phi[NIix[i][jj]][i][j] / Dsu;
148 }
149 }
150 gradIJ += alpha * sumDdivPhi;
151 if (!symmNeighbs)
152 {
153 // need to remove sum on neighbors u: u-->s, uneasy way.
154 //TODO: computation is much too costly here; need preprocessing
155 sumDdivPhi = 0.0;
156 for (int ii=0; ii<nrow; ii++)
157 {
158 //~ if (ii == i) continue;
159 for (int jj=0; jj<lengthNIix[ii]; jj++)
160 {
161 if (NIix[ii][jj] == i)
162 {
163 sumDdivPhi += phi[ii][i][j] / (invDs + 1.0/lengthNIix[ii]);
164 break; //i can only appear once among neighbors of ii
165 }
166 }
167 }
168 gradIJ -= alpha * sumDdivPhi;
169 }
170 F[i][j] -= h * gradIJ;
171 }
172 }
173
174 // gradient ascent for phi
175 for (int i=0; i<nrow; i++)
176 {
177 double Ds = 1.0/lengthNIix[i];
178 for (int ii=0; ii<nrow; ii++)
179 {
180 double Dsu = Ds + 1.0/lengthNIix[ii];
181 for (int j=0; j<ncol; j++)
182 {
183 double gradI_II_J = alpha * (F[i][j] - F[ii][j]) / Dsu;
184 phi[i][ii][j] += h * gradI_II_J;
185 }
186 // also renormalize to have ||phi_su|| == 1.0
187 double normPhi = norm2(phi[i][ii], ncol);
188 //~ if (normPhi > 1.0) {
189 if (normPhi > EPS)
190 {
191 for (int j=0; j<ncol; j++)
192 phi[i][ii][j] /= normPhi;
193 }
194 }
195 }
196
197 oldLL = LL;
198 LL = computeLogLikelihood(
199 F, theta, Z, phi, lengthNIix, NIix, alpha, nrow, ncol);
200 if (trace)
201 printf("%i / LLs: %.0f %.0f\n",kounter,oldLL,LL); // optional trace of LL evolution
202 } /*** END ITERATIONS ***/
203
204 // free all local parameters arrays but (theta, F) (used as return value)
205 for (int i=0; i<nrow; i++)
206 {
207 free(Z[i]);
208 for (int ii=0; ii<nrow; ii++)
209 free(phi[i][ii]);
210 free(phi[i]);
211 }
212 free(Z);
213 free(phi);
214
215 Parameters params;
216 params.f = F;
217 params.theta = theta;
218 return params;
219 }