d9407a44519b71a95c5fff464aba649470b75de7
1 #include "sources/neighbors.h"
4 #include "sources/utils/algebra.h"
5 #include <cgds/BufferTop.h>
6 #include "sources/utils/boolean.h"
8 // evaluate distance between M[i,] and M[ii,]
9 double getDistance(double* M
, int i
, int ii
, int ncol
, double alpha
,
12 // if simpleDists is ON, it means we are in stage 2 after convex optimization
13 // ==> use full data, we know that now there are no NA's.
15 return distance2(M
+i
*ncol
, M
+ii
*ncol
, ncol
);
17 // get distance for values per year
19 int valCount
= 0; // number of not-NA fields
20 int nobs
= ncol
-2; // ncol is 9+2 for our initial dataset (2001 to 2009)
21 for (int j
=0; j
<nobs
; j
++)
23 if (!isnan(M
[i
*ncol
+j
]) && !isnan(M
[ii
*ncol
+j
]))
25 double diff
= M
[i
*ncol
+j
] - M
[ii
*ncol
+j
];
33 // get distance for coordinates values
35 for (int j
=nobs
; j
<ncol
; j
++)
37 double diff
= M
[i
*ncol
+j
] - M
[ii
*ncol
+j
];
40 dist2
/= 2.0; //to harmonize with above normalization
42 return sqrt(dist2
); // no other choice
44 //NOTE: adaptive alpha, the more NA's in vector,
45 // the more geo. coords. are taken into account
46 alpha
= (alpha
>= 0.0 ? alpha
: (double)valCount
/nobs
);
47 return sqrt(alpha
*dist1
+ (1.0-alpha
)*dist2
);
50 // symmetrize neighborhoods lists (augmenting or reducing)
51 void symmetrizeNeighbors(List
** neighborhoods
, int nrow
, int gmode
)
53 IndDist curNeighbI
, curNeighbJ
;
54 for (int i
=0; i
<nrow
; i
++)
56 ListIterator
* iterI
= list_get_iterator(neighborhoods
[i
]);
57 while (listI_has_data(iterI
))
59 listI_get(iterI
, curNeighbI
);
60 // check if neighborhoods[curNeighbI->index] has i
61 bool reciproc
= S_FALSE
;
62 List
* neighbsJ
= neighborhoods
[curNeighbI
.index
];
63 ListIterator
* iterJ
= list_get_iterator(neighbsJ
);
64 while (listI_has_data(iterJ
))
66 listI_get(iterJ
, curNeighbJ
);
67 if (curNeighbJ
.index
== i
)
72 listI_move_next(iterJ
);
80 // add (previously) non-mutual neighbor to neighbsJ
81 list_insert_back(neighbsJ
, i
);
83 // test list_size() >= 2 because we don't allow empty neighborhoods
84 else if (gmode
== 0 && list_size(neighborhoods
[i
]) >= 2)
87 // remove non-mutual neighbor to neighbsI
88 listI_remove(iterI
,BACKWARD
);
91 listI_move_next(iterI
);
98 // restrain neighborhoods: choose one per quadrant (for convex optimization)
99 void restrainToQuadrants(List
** neighborhoods
, int nrow
, int ncol
, double* M
)
102 for (int i
=0; i
<nrow
; i
++)
104 ListIterator
* iter
= list_get_iterator(neighborhoods
[i
]);
105 // choose one neighbor in each quadrant (if available);
106 // WARNING: multi-constraint optimization,
107 // > as close as possible to angle bissectrice
108 // > not too far from current data point
110 // resp. SW,NW,SE,NE "best" neighbors :
111 int bestIndexInDir
[4] = {-1,-1,-1,-1};
112 // corresponding "performances" :
113 double bestPerfInDir
[4] = {INFINITY
,INFINITY
,INFINITY
,INFINITY
};
114 while (listI_has_data(iter
))
116 listI_get(iter
, curNeighbI
);
117 // get delta_x and delta_y to know in which quadrant
118 // we are and then get "index performance"
119 // ASSUMPTION: all sites are distinct
121 M
[i
*ncol
+(ncol
-2)] - M
[curNeighbI
.index
*ncol
+(ncol
-2)];
123 M
[i
*ncol
+(ncol
-1)] - M
[curNeighbI
.index
*ncol
+(ncol
-1)];
124 double angle
= fabs(atan(deltaY
/deltaX
));
125 // naive combination; [TODO: improve]
126 double perf
= curNeighbI
.distance
+ fabs(angle
-M_PI_4
);
127 // map {-1,-1} to 0, {-1,1} to 1 ...etc :
128 int index
= 2*(deltaX
>0)+(deltaY
>0);
129 if (perf
< bestPerfInDir
[index
])
131 bestIndexInDir
[index
] = curNeighbI
.index
;
132 bestPerfInDir
[index
] = perf
;
134 listI_move_next(iter
);
137 // restrain neighborhood to the "best directions" found
138 listI_reset_head(iter
);
139 while (listI_has_data(iter
))
141 listI_get(iter
, curNeighbI
);
142 // test list_size() <= 1 because we don't allow empty neighborhoods
143 if (list_size(neighborhoods
[i
]) <= 1 ||
144 curNeighbI
.index
==bestIndexInDir
[0] ||
145 curNeighbI
.index
==bestIndexInDir
[1] ||
146 curNeighbI
.index
==bestIndexInDir
[2] ||
147 curNeighbI
.index
==bestIndexInDir
[3])
150 listI_move_next(iter
);
153 // remove current node
154 listI_remove(iter
,FORWARD
);
160 // Function to obtain neighborhoods.
161 // NOTE: alpha = weight parameter to compute distances; -1 means "adaptive"
162 List
** getNeighbors_core(double* M
, double alpha
, int k
, int gmode
,
163 bool simpleDists
, int nrow
, int ncol
)
165 // prepare list buffers to get neighborhoods
166 // (OK for small to moderate values of k)
167 BufferTop
** bufferNeighbs
=
168 (BufferTop
**)malloc(nrow
*sizeof(BufferTop
*));
169 for (int i
=0; i
<nrow
; i
++)
170 bufferNeighbs
[i
] = buffertop_new(IndDist
, k
, MIN_T
, 2);
174 // for each row in M, find its k nearest neighbors
175 for (int i
=0; i
<nrow
; i
++)
177 // for each potential neighbor...
178 for (int ii
=0; ii
<nrow
; ii
++)
183 // evaluate distance from M[i,] to M[ii,]
185 getDistance(M
, i
, ii
, ncol
, alpha
, simpleDists
);
187 // (try to) add index 'ii' + distance to bufferNeighbs[i]
188 IndDist id
= (IndDist
){.index
=ii
, .distance
=distance
};
189 buffertop_tryadd(bufferNeighbs
[i
], id
, distance
);
193 // free buffers and transfer their contents into lists easier to process
194 List
** neighborhoods
= (List
**)malloc(nrow
*sizeof(List
*));
195 for (int i
=0; i
<nrow
; i
++)
197 neighborhoods
[i
] = buffertop_2list(bufferNeighbs
[i
]);
198 buffertop_destroy(bufferNeighbs
[i
]);
202 // OPTIONAL MUTUAL KNN
203 if (gmode
==0 || gmode
==1)
205 // additional processing to symmetrize neighborhoods (augment or not)
206 symmetrizeNeighbors(neighborhoods
, nrow
, gmode
);
210 // choose one neighbor per quadrant (for convex optimization)
211 restrainToQuadrants(neighborhoods
, nrow
, ncol
, M
);
213 // nothing to do if gmode==2 (simple assymmetric kNN)
215 return neighborhoods
;