Commit | Line | Data |
---|---|---|
15d1825d BA |
1 | #include "sources/neighbors.h" |
2 | #include <stdlib.h> | |
3 | #include <math.h> | |
4 | #include "sources/utils/algebra.h" | |
5 | #include <cgds/BufferTop.h> | |
6 | #include "sources/utils/boolean.h" | |
7 | ||
8 | // evaluate distance between M[i,] and M[ii,] | |
9 | double getDistance(double* M, int i, int ii, int ncol, double alpha, | |
10 | bool simpleDists) | |
11 | { | |
12 | // if simpleDists is ON, it means we are in stage 2 after convex optimization | |
13 | // ==> use full data, we know that now there are no NA's. | |
14 | if (simpleDists) | |
15 | return distance2(M+i*ncol, M+ii*ncol, ncol); | |
16 | ||
17 | // get distance for values per year | |
18 | double dist1 = 0.0; | |
19 | int valCount = 0; // number of not-NA fields | |
20 | int nobs = ncol-2; // ncol is 9+2 for our initial dataset (2001 to 2009) | |
21 | for (int j=0; j<nobs; j++) | |
22 | { | |
23 | if (!isnan(M[i*ncol+j]) && !isnan(M[ii*ncol+j])) | |
24 | { | |
25 | double diff = M[i*ncol+j] - M[ii*ncol+j]; | |
26 | dist1 += diff*diff; | |
27 | valCount++; | |
28 | } | |
29 | } | |
30 | if (valCount > 0) | |
31 | dist1 /= valCount; | |
32 | ||
33 | // get distance for coordinates values | |
34 | double dist2 = 0.0; | |
35 | for (int j=nobs; j<ncol; j++) | |
36 | { | |
37 | double diff = M[i*ncol+j] - M[ii*ncol+j]; | |
38 | dist2 += diff*diff; | |
39 | } | |
40 | dist2 /= 2.0; //to harmonize with above normalization | |
41 | if (valCount == 0) | |
42 | return sqrt(dist2); // no other choice | |
43 | ||
44 | //NOTE: adaptive alpha, the more NA's in vector, | |
45 | // the more geo. coords. are taken into account | |
46 | alpha = (alpha >= 0.0 ? alpha : (double)valCount/nobs); | |
47 | return sqrt(alpha*dist1 + (1.0-alpha)*dist2); | |
48 | } | |
49 | ||
50 | // symmetrize neighborhoods lists (augmenting or reducing) | |
51 | void symmetrizeNeighbors(List** neighborhoods, int nrow, int gmode) | |
52 | { | |
53 | IndDist curNeighbI, curNeighbJ; | |
54 | for (int i=0; i<nrow; i++) | |
55 | { | |
56 | ListIterator* iterI = list_get_iterator(neighborhoods[i]); | |
57 | while (listI_has_data(iterI)) | |
58 | { | |
59 | listI_get(iterI, curNeighbI); | |
60 | // check if neighborhoods[curNeighbI->index] has i | |
61 | bool reciproc = S_FALSE; | |
62 | List* neighbsJ = neighborhoods[curNeighbI.index]; | |
63 | ListIterator* iterJ = list_get_iterator(neighbsJ); | |
64 | while (listI_has_data(iterJ)) | |
65 | { | |
66 | listI_get(iterJ, curNeighbJ); | |
67 | if (curNeighbJ.index == i) | |
68 | { | |
69 | reciproc = S_TRUE; | |
70 | break; | |
71 | } | |
72 | listI_move_next(iterJ); | |
73 | } | |
74 | ||
75 | if (!reciproc) | |
76 | { | |
77 | if (gmode == 1) | |
78 | { | |
79 | // augmenting: | |
80 | // add (previously) non-mutual neighbor to neighbsJ | |
81 | list_insert_back(neighbsJ, i); | |
82 | } | |
83 | // test list_size() >= 2 because we don't allow empty neighborhoods | |
84 | else if (gmode == 0 && list_size(neighborhoods[i]) >= 2) | |
85 | { | |
86 | // reducing: | |
87 | // remove non-mutual neighbor to neighbsI | |
88 | listI_remove(iterI,BACKWARD); | |
89 | } | |
90 | } | |
91 | listI_move_next(iterI); | |
92 | listI_destroy(iterJ); | |
93 | } | |
94 | listI_destroy(iterI); | |
95 | } | |
96 | } | |
97 | ||
98 | // restrain neighborhoods: choose one per quadrant (for convex optimization) | |
99 | void restrainToQuadrants(List** neighborhoods, int nrow, int ncol, double* M) | |
100 | { | |
101 | IndDist curNeighbI; | |
102 | for (int i=0; i<nrow; i++) | |
103 | { | |
104 | ListIterator* iter = list_get_iterator(neighborhoods[i]); | |
105 | // choose one neighbor in each quadrant (if available); | |
106 | // WARNING: multi-constraint optimization, | |
107 | // > as close as possible to angle bissectrice | |
108 | // > not too far from current data point | |
109 | ||
110 | // resp. SW,NW,SE,NE "best" neighbors : | |
111 | int bestIndexInDir[4] = {-1,-1,-1,-1}; | |
112 | // corresponding "performances" : | |
113 | double bestPerfInDir[4] = {INFINITY,INFINITY,INFINITY,INFINITY}; | |
114 | while (listI_has_data(iter)) | |
115 | { | |
116 | listI_get(iter, curNeighbI); | |
117 | // get delta_x and delta_y to know in which quadrant | |
118 | // we are and then get "index performance" | |
119 | // ASSUMPTION: all sites are distinct | |
120 | double deltaX = | |
121 | M[i*ncol+(ncol-2)] - M[curNeighbI.index*ncol+(ncol-2)]; | |
122 | double deltaY = | |
123 | M[i*ncol+(ncol-1)] - M[curNeighbI.index*ncol+(ncol-1)]; | |
124 | double angle = fabs(atan(deltaY/deltaX)); | |
125 | // naive combination; [TODO: improve] | |
126 | double perf = curNeighbI.distance + fabs(angle-M_PI_4); | |
127 | // map {-1,-1} to 0, {-1,1} to 1 ...etc : | |
128 | int index = 2*(deltaX>0)+(deltaY>0); | |
129 | if (perf < bestPerfInDir[index]) | |
130 | { | |
131 | bestIndexInDir[index] = curNeighbI.index; | |
132 | bestPerfInDir[index] = perf; | |
133 | } | |
134 | listI_move_next(iter); | |
135 | } | |
136 | ||
137 | // restrain neighborhood to the "best directions" found | |
138 | listI_reset_head(iter); | |
139 | while (listI_has_data(iter)) | |
140 | { | |
141 | listI_get(iter, curNeighbI); | |
142 | // test list_size() <= 1 because we don't allow empty neighborhoods | |
143 | if (list_size(neighborhoods[i]) <= 1 || | |
144 | curNeighbI.index==bestIndexInDir[0] || | |
145 | curNeighbI.index==bestIndexInDir[1] || | |
146 | curNeighbI.index==bestIndexInDir[2] || | |
147 | curNeighbI.index==bestIndexInDir[3]) | |
148 | { | |
149 | // OK, keep it | |
150 | listI_move_next(iter); | |
151 | continue; | |
152 | } | |
153 | // remove current node | |
154 | listI_remove(iter,FORWARD); | |
155 | } | |
156 | listI_destroy(iter); | |
157 | } | |
158 | } | |
159 | ||
160 | // Function to obtain neighborhoods. | |
161 | // NOTE: alpha = weight parameter to compute distances; -1 means "adaptive" | |
162 | List** getNeighbors_core(double* M, double alpha, int k, int gmode, | |
163 | bool simpleDists, int nrow, int ncol) | |
164 | { | |
165 | // prepare list buffers to get neighborhoods | |
166 | // (OK for small to moderate values of k) | |
167 | BufferTop** bufferNeighbs = | |
168 | (BufferTop**)malloc(nrow*sizeof(BufferTop*)); | |
169 | for (int i=0; i<nrow; i++) | |
170 | bufferNeighbs[i] = buffertop_new(IndDist, k, MIN_T, 2); | |
171 | ||
172 | // MAIN LOOP | |
173 | ||
174 | // for each row in M, find its k nearest neighbors | |
175 | for (int i=0; i<nrow; i++) | |
176 | { | |
177 | // for each potential neighbor... | |
178 | for (int ii=0; ii<nrow; ii++) | |
179 | { | |
180 | if (ii == i) | |
181 | continue; | |
182 | ||
183 | // evaluate distance from M[i,] to M[ii,] | |
184 | double distance = | |
185 | getDistance(M, i, ii, ncol, alpha, simpleDists); | |
186 | ||
187 | // (try to) add index 'ii' + distance to bufferNeighbs[i] | |
188 | IndDist id = (IndDist){.index=ii, .distance=distance}; | |
189 | buffertop_tryadd(bufferNeighbs[i], id, distance); | |
190 | } | |
191 | } | |
192 | ||
193 | // free buffers and transfer their contents into lists easier to process | |
194 | List** neighborhoods = (List**)malloc(nrow*sizeof(List*)); | |
195 | for (int i=0; i<nrow; i++) | |
196 | { | |
197 | neighborhoods[i] = buffertop_2list(bufferNeighbs[i]); | |
198 | buffertop_destroy(bufferNeighbs[i]); | |
199 | } | |
200 | free(bufferNeighbs); | |
201 | ||
202 | // OPTIONAL MUTUAL KNN | |
203 | if (gmode==0 || gmode==1) | |
204 | { | |
205 | // additional processing to symmetrize neighborhoods (augment or not) | |
206 | symmetrizeNeighbors(neighborhoods, nrow, gmode); | |
207 | } | |
208 | else if (gmode==3) | |
209 | { | |
210 | // choose one neighbor per quadrant (for convex optimization) | |
211 | restrainToQuadrants(neighborhoods, nrow, ncol, M); | |
212 | } | |
213 | // nothing to do if gmode==2 (simple assymmetric kNN) | |
214 | ||
215 | return neighborhoods; | |
216 | } |