first commit
[synclust.git] / src / sources / neighbors.c
1 #include "sources/neighbors.h"
2 #include <stdlib.h>
3 #include <math.h>
4 #include "sources/utils/algebra.h"
5 #include <cgds/BufferTop.h>
6 #include "sources/utils/boolean.h"
7
8 // evaluate distance between M[i,] and M[ii,]
9 double getDistance(double* M, int i, int ii, int ncol, double alpha,
10 bool simpleDists)
11 {
12 // if simpleDists is ON, it means we are in stage 2 after convex optimization
13 // ==> use full data, we know that now there are no NA's.
14 if (simpleDists)
15 return distance2(M+i*ncol, M+ii*ncol, ncol);
16
17 // get distance for values per year
18 double dist1 = 0.0;
19 int valCount = 0; // number of not-NA fields
20 int nobs = ncol-2; // ncol is 9+2 for our initial dataset (2001 to 2009)
21 for (int j=0; j<nobs; j++)
22 {
23 if (!isnan(M[i*ncol+j]) && !isnan(M[ii*ncol+j]))
24 {
25 double diff = M[i*ncol+j] - M[ii*ncol+j];
26 dist1 += diff*diff;
27 valCount++;
28 }
29 }
30 if (valCount > 0)
31 dist1 /= valCount;
32
33 // get distance for coordinates values
34 double dist2 = 0.0;
35 for (int j=nobs; j<ncol; j++)
36 {
37 double diff = M[i*ncol+j] - M[ii*ncol+j];
38 dist2 += diff*diff;
39 }
40 dist2 /= 2.0; //to harmonize with above normalization
41 if (valCount == 0)
42 return sqrt(dist2); // no other choice
43
44 //NOTE: adaptive alpha, the more NA's in vector,
45 // the more geo. coords. are taken into account
46 alpha = (alpha >= 0.0 ? alpha : (double)valCount/nobs);
47 return sqrt(alpha*dist1 + (1.0-alpha)*dist2);
48 }
49
50 // symmetrize neighborhoods lists (augmenting or reducing)
51 void symmetrizeNeighbors(List** neighborhoods, int nrow, int gmode)
52 {
53 IndDist curNeighbI, curNeighbJ;
54 for (int i=0; i<nrow; i++)
55 {
56 ListIterator* iterI = list_get_iterator(neighborhoods[i]);
57 while (listI_has_data(iterI))
58 {
59 listI_get(iterI, curNeighbI);
60 // check if neighborhoods[curNeighbI->index] has i
61 bool reciproc = S_FALSE;
62 List* neighbsJ = neighborhoods[curNeighbI.index];
63 ListIterator* iterJ = list_get_iterator(neighbsJ);
64 while (listI_has_data(iterJ))
65 {
66 listI_get(iterJ, curNeighbJ);
67 if (curNeighbJ.index == i)
68 {
69 reciproc = S_TRUE;
70 break;
71 }
72 listI_move_next(iterJ);
73 }
74
75 if (!reciproc)
76 {
77 if (gmode == 1)
78 {
79 // augmenting:
80 // add (previously) non-mutual neighbor to neighbsJ
81 list_insert_back(neighbsJ, i);
82 }
83 // test list_size() >= 2 because we don't allow empty neighborhoods
84 else if (gmode == 0 && list_size(neighborhoods[i]) >= 2)
85 {
86 // reducing:
87 // remove non-mutual neighbor to neighbsI
88 listI_remove(iterI,BACKWARD);
89 }
90 }
91 listI_move_next(iterI);
92 listI_destroy(iterJ);
93 }
94 listI_destroy(iterI);
95 }
96 }
97
98 // restrain neighborhoods: choose one per quadrant (for convex optimization)
99 void restrainToQuadrants(List** neighborhoods, int nrow, int ncol, double* M)
100 {
101 IndDist curNeighbI;
102 for (int i=0; i<nrow; i++)
103 {
104 ListIterator* iter = list_get_iterator(neighborhoods[i]);
105 // choose one neighbor in each quadrant (if available);
106 // WARNING: multi-constraint optimization,
107 // > as close as possible to angle bissectrice
108 // > not too far from current data point
109
110 // resp. SW,NW,SE,NE "best" neighbors :
111 int bestIndexInDir[4] = {-1,-1,-1,-1};
112 // corresponding "performances" :
113 double bestPerfInDir[4] = {INFINITY,INFINITY,INFINITY,INFINITY};
114 while (listI_has_data(iter))
115 {
116 listI_get(iter, curNeighbI);
117 // get delta_x and delta_y to know in which quadrant
118 // we are and then get "index performance"
119 // ASSUMPTION: all sites are distinct
120 double deltaX =
121 M[i*ncol+(ncol-2)] - M[curNeighbI.index*ncol+(ncol-2)];
122 double deltaY =
123 M[i*ncol+(ncol-1)] - M[curNeighbI.index*ncol+(ncol-1)];
124 double angle = fabs(atan(deltaY/deltaX));
125 // naive combination; [TODO: improve]
126 double perf = curNeighbI.distance + fabs(angle-M_PI_4);
127 // map {-1,-1} to 0, {-1,1} to 1 ...etc :
128 int index = 2*(deltaX>0)+(deltaY>0);
129 if (perf < bestPerfInDir[index])
130 {
131 bestIndexInDir[index] = curNeighbI.index;
132 bestPerfInDir[index] = perf;
133 }
134 listI_move_next(iter);
135 }
136
137 // restrain neighborhood to the "best directions" found
138 listI_reset_head(iter);
139 while (listI_has_data(iter))
140 {
141 listI_get(iter, curNeighbI);
142 // test list_size() <= 1 because we don't allow empty neighborhoods
143 if (list_size(neighborhoods[i]) <= 1 ||
144 curNeighbI.index==bestIndexInDir[0] ||
145 curNeighbI.index==bestIndexInDir[1] ||
146 curNeighbI.index==bestIndexInDir[2] ||
147 curNeighbI.index==bestIndexInDir[3])
148 {
149 // OK, keep it
150 listI_move_next(iter);
151 continue;
152 }
153 // remove current node
154 listI_remove(iter,FORWARD);
155 }
156 listI_destroy(iter);
157 }
158 }
159
160 // Function to obtain neighborhoods.
161 // NOTE: alpha = weight parameter to compute distances; -1 means "adaptive"
162 List** getNeighbors_core(double* M, double alpha, int k, int gmode,
163 bool simpleDists, int nrow, int ncol)
164 {
165 // prepare list buffers to get neighborhoods
166 // (OK for small to moderate values of k)
167 BufferTop** bufferNeighbs =
168 (BufferTop**)malloc(nrow*sizeof(BufferTop*));
169 for (int i=0; i<nrow; i++)
170 bufferNeighbs[i] = buffertop_new(IndDist, k, MIN_T, 2);
171
172 // MAIN LOOP
173
174 // for each row in M, find its k nearest neighbors
175 for (int i=0; i<nrow; i++)
176 {
177 // for each potential neighbor...
178 for (int ii=0; ii<nrow; ii++)
179 {
180 if (ii == i)
181 continue;
182
183 // evaluate distance from M[i,] to M[ii,]
184 double distance =
185 getDistance(M, i, ii, ncol, alpha, simpleDists);
186
187 // (try to) add index 'ii' + distance to bufferNeighbs[i]
188 IndDist id = (IndDist){.index=ii, .distance=distance};
189 buffertop_tryadd(bufferNeighbs[i], id, distance);
190 }
191 }
192
193 // free buffers and transfer their contents into lists easier to process
194 List** neighborhoods = (List**)malloc(nrow*sizeof(List*));
195 for (int i=0; i<nrow; i++)
196 {
197 neighborhoods[i] = buffertop_2list(bufferNeighbs[i]);
198 buffertop_destroy(bufferNeighbs[i]);
199 }
200 free(bufferNeighbs);
201
202 // OPTIONAL MUTUAL KNN
203 if (gmode==0 || gmode==1)
204 {
205 // additional processing to symmetrize neighborhoods (augment or not)
206 symmetrizeNeighbors(neighborhoods, nrow, gmode);
207 }
208 else if (gmode==3)
209 {
210 // choose one neighbor per quadrant (for convex optimization)
211 restrainToQuadrants(neighborhoods, nrow, ncol, M);
212 }
213 // nothing to do if gmode==2 (simple assymmetric kNN)
214
215 return neighborhoods;
216 }