First commit
[nngd.git] / src / knncpp.h
1 /* knncpp.h
2 *
3 * Author: Fabian Meyer
4 * Created On: 22 Aug 2021
5 * License: MIT
6 */
7
8 #ifndef KNNCPP_H_
9 #define KNNCPP_H_
10
11 #include <Eigen/Geometry>
12 #include <vector>
13 #include <map>
14 #include <set>
15
16 #ifdef KNNCPP_FLANN
17
18 #include <flann/flann.hpp>
19
20 #endif
21
22 namespace knncpp
23 {
24 /********************************************************
25 * Matrix Definitions
26 *******************************************************/
27
28 typedef typename Eigen::MatrixXd::Index Index;
29
30 typedef Eigen::Matrix<Index, Eigen::Dynamic, 1> Vectori;
31 typedef Eigen::Matrix<Index, 2, 1> Vector2i;
32 typedef Eigen::Matrix<Index, 3, 1> Vector3i;
33 typedef Eigen::Matrix<Index, 4, 1> Vector4i;
34 typedef Eigen::Matrix<Index, 5, 1> Vector5i;
35
36 typedef Eigen::Matrix<Index, Eigen::Dynamic, Eigen::Dynamic> Matrixi;
37 typedef Eigen::Matrix<Index, 2, 2> Matrix2i;
38 typedef Eigen::Matrix<Index, 3, 3> Matrix3i;
39 typedef Eigen::Matrix<Index, 4, 4> Matrix4i;
40 typedef Eigen::Matrix<Index, 5, 5> Matrix5i;
41
42 typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic> Matrixf;
43 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> Matrixd;
44
45 /********************************************************
46 * Distance Functors
47 *******************************************************/
48
49 /** Manhatten distance functor.
50 * This the same as the L1 minkowski distance but more efficient.
51 * @see EuclideanDistance, ChebyshevDistance, MinkowskiDistance */
52 template <typename Scalar>
53 struct ManhattenDistance
54 {
55 /** Compute the unrooted distance between two vectors.
56 * @param lhs vector on left hand side
57 * @param rhs vector on right hand side */
58 template<typename DerivedA, typename DerivedB>
59 Scalar operator()(const Eigen::MatrixBase<DerivedA> &lhs,
60 const Eigen::MatrixBase<DerivedB> &rhs) const
61 {
62 static_assert(
63 std::is_same<typename Eigen::MatrixBase<DerivedA>::Scalar,Scalar>::value,
64 "distance scalar and input matrix A must have same type");
65 static_assert(
66 std::is_same<typename Eigen::MatrixBase<DerivedB>::Scalar, Scalar>::value,
67 "distance scalar and input matrix B must have same type");
68
69 return (lhs - rhs).cwiseAbs().sum();
70 }
71
72 /** Compute the unrooted distance between two scalars.
73 * @param lhs scalar on left hand side
74 * @param rhs scalar on right hand side */
75 Scalar operator()(const Scalar lhs,
76 const Scalar rhs) const
77 {
78 return std::abs(lhs - rhs);
79 }
80
81 /** Compute the root of a unrooted distance value.
82 * @param value unrooted distance value */
83 Scalar operator()(const Scalar val) const
84 {
85 return val;
86 }
87 };
88
89 /** Euclidean distance functor.
90 * This the same as the L2 minkowski distance but more efficient.
91 * @see ManhattenDistance, ChebyshevDistance, MinkowskiDistance */
92 template <typename Scalar>
93 struct EuclideanDistance
94 {
95 /** Compute the unrooted distance between two vectors.
96 * @param lhs vector on left hand side
97 * @param rhs vector on right hand side */
98 template<typename DerivedA, typename DerivedB>
99 Scalar operator()(const Eigen::MatrixBase<DerivedA> &lhs,
100 const Eigen::MatrixBase<DerivedB> &rhs) const
101 {
102 static_assert(
103 std::is_same<typename Eigen::MatrixBase<DerivedA>::Scalar,Scalar>::value,
104 "distance scalar and input matrix A must have same type");
105 static_assert(
106 std::is_same<typename Eigen::MatrixBase<DerivedB>::Scalar, Scalar>::value,
107 "distance scalar and input matrix B must have same type");
108
109 return (lhs - rhs).cwiseAbs2().sum();
110 }
111
112 /** Compute the unrooted distance between two scalars.
113 * @param lhs scalar on left hand side
114 * @param rhs scalar on right hand side */
115 Scalar operator()(const Scalar lhs,
116 const Scalar rhs) const
117 {
118 Scalar diff = lhs - rhs;
119 return diff * diff;
120 }
121
122 /** Compute the root of a unrooted distance value.
123 * @param value unrooted distance value */
124 Scalar operator()(const Scalar val) const
125 {
126 return std::sqrt(val);
127 }
128 };
129
130 /** General minkowski distance functor.
131 * The infinite version is only available through the chebyshev distance.
132 * @see ManhattenDistance, EuclideanDistance, ChebyshevDistance */
133 template <typename Scalar, int P>
134 struct MinkowskiDistance
135 {
136 struct Pow
137 {
138 Scalar operator()(const Scalar val) const
139 {
140 Scalar result = 1;
141 for(int i = 0; i < P; ++i)
142 result *= val;
143 return result;
144 }
145 };
146
147 /** Compute the unrooted distance between two vectors.
148 * @param lhs vector on left hand side
149 * @param rhs vector on right hand side */
150 template<typename DerivedA, typename DerivedB>
151 Scalar operator()(const Eigen::MatrixBase<DerivedA> &lhs,
152 const Eigen::MatrixBase<DerivedB> &rhs) const
153 {
154 static_assert(
155 std::is_same<typename Eigen::MatrixBase<DerivedA>::Scalar,Scalar>::value,
156 "distance scalar and input matrix A must have same type");
157 static_assert(
158 std::is_same<typename Eigen::MatrixBase<DerivedB>::Scalar, Scalar>::value,
159 "distance scalar and input matrix B must have same type");
160
161 return (lhs - rhs).cwiseAbs().unaryExpr(MinkowskiDistance::Pow()).sum();
162 }
163
164 /** Compute the unrooted distance between two scalars.
165 * @param lhs scalar on left hand side
166 * @param rhs scalar on right hand side */
167 Scalar operator()(const Scalar lhs,
168 const Scalar rhs) const
169 {
170 return std::pow(std::abs(lhs - rhs), P);;
171 }
172
173 /** Compute the root of a unrooted distance value.
174 * @param value unrooted distance value */
175 Scalar operator()(const Scalar val) const
176 {
177 return std::pow(val, 1 / static_cast<Scalar>(P));
178 }
179 };
180
181 /** Chebyshev distance functor.
182 * This distance is the same as infinity minkowski distance.
183 * @see ManhattenDistance, EuclideanDistance, MinkowskiDistance */
184 template<typename Scalar>
185 struct ChebyshevDistance
186 {
187 /** Compute the unrooted distance between two vectors.
188 * @param lhs vector on left hand side
189 * @param rhs vector on right hand side */
190 template<typename DerivedA, typename DerivedB>
191 Scalar operator()(const Eigen::MatrixBase<DerivedA> &lhs,
192 const Eigen::MatrixBase<DerivedB> &rhs) const
193 {
194 static_assert(
195 std::is_same<typename Eigen::MatrixBase<DerivedA>::Scalar,Scalar>::value,
196 "distance scalar and input matrix A must have same type");
197 static_assert(
198 std::is_same<typename Eigen::MatrixBase<DerivedB>::Scalar, Scalar>::value,
199 "distance scalar and input matrix B must have same type");
200
201 return (lhs - rhs).cwiseAbs().maxCoeff();
202 }
203
204 /** Compute the unrooted distance between two scalars.
205 * @param lhs scalar on left hand side
206 * @param rhs scalar on right hand side */
207 Scalar operator()(const Scalar lhs,
208 const Scalar rhs) const
209 {
210 return std::abs(lhs - rhs);
211 }
212
213 /** Compute the root of a unrooted distance value.
214 * @param value unrooted distance value */
215 Scalar operator()(const Scalar val) const
216 {
217 return val;
218 }
219 };
220
221 /** Hamming distance functor.
222 * The distance vectors have to be of integral type and should hold the
223 * information vectors as bitmasks.
224 * Performs a XOR operation on the vectors and counts the number of set
225 * ones. */
226 template<typename Scalar>
227 struct HammingDistance
228 {
229 static_assert(std::is_integral<Scalar>::value,
230 "HammingDistance requires integral Scalar type");
231
232 struct XOR
233 {
234 Scalar operator()(const Scalar lhs, const Scalar rhs) const
235 {
236 return lhs ^ rhs;
237 }
238 };
239
240 struct BitCount
241 {
242 Scalar operator()(const Scalar lhs) const
243 {
244 Scalar copy = lhs;
245 Scalar result = 0;
246 while(copy != static_cast<Scalar>(0))
247 {
248 ++result;
249 copy &= (copy - 1);
250 }
251
252 return result;
253 }
254 };
255
256 /** Compute the unrooted distance between two vectors.
257 * @param lhs vector on left hand side
258 * @param rhs vector on right hand side */
259 template<typename DerivedA, typename DerivedB>
260 Scalar operator()(const Eigen::MatrixBase<DerivedA> &lhs,
261 const Eigen::MatrixBase<DerivedB> &rhs) const
262 {
263 static_assert(
264 std::is_same<typename Eigen::MatrixBase<DerivedA>::Scalar,Scalar>::value,
265 "distance scalar and input matrix A must have same type");
266 static_assert(
267 std::is_same<typename Eigen::MatrixBase<DerivedB>::Scalar, Scalar>::value,
268 "distance scalar and input matrix B must have same type");
269
270 return lhs.
271 binaryExpr(rhs, XOR()).
272 unaryExpr(BitCount()).
273 sum();
274 }
275
276 /** Compute the unrooted distance between two scalars.
277 * @param lhs scalar on left hand side
278 * @param rhs scalar on right hand side */
279 Scalar operator()(const Scalar lhs,
280 const Scalar rhs) const
281 {
282 BitCount cnt;
283 XOR xOr;
284 return cnt(xOr(lhs, rhs));
285 }
286
287 /** Compute the root of a unrooted distance value.
288 * @param value unrooted distance value */
289 Scalar operator()(const Scalar value) const
290 {
291 return value;
292 }
293 };
294
295 /** Efficient heap structure to query nearest neighbours. */
296 template<typename Scalar>
297 class QueryHeap
298 {
299 private:
300 Index *indices_ = nullptr;
301 Scalar *distances_ = nullptr;
302 size_t maxSize_ = 0;
303 size_t size_ = 0;
304 public:
305 /** Creates a query heap with the given index and distance memory regions. */
306 QueryHeap(Index *indices, Scalar *distances, const size_t maxSize)
307 : indices_(indices), distances_(distances), maxSize_(maxSize)
308 { }
309
310 /** Pushes a new query data set into the heap with the given
311 * index and distance.
312 * The index identifies the point for which the given distance
313 * was computed.
314 * @param idx index / ID of the query point
315 * @param dist distance that was computed for the query point*/
316 void push(const Index idx, const Scalar dist)
317 {
318 assert(!full());
319
320 // add new value at the end
321 indices_[size_] = idx;
322 distances_[size_] = dist;
323 ++size_;
324
325 // upheap
326 size_t k = size_ - 1;
327 size_t tmp = (k - 1) / 2;
328 while(k > 0 && distances_[tmp] < dist)
329 {
330 distances_[k] = distances_[tmp];
331 indices_[k] = indices_[tmp];
332 k = tmp;
333 tmp = (k - 1) / 2;
334 }
335 distances_[k] = dist;
336 indices_[k] = idx;
337 }
338
339 /** Removes the element at the front of the heap and restores
340 * the heap order. */
341 void pop()
342 {
343 assert(!empty());
344
345 // replace first element with last
346 --size_;
347 distances_[0] = distances_[size_];
348 indices_[0] = indices_[size_];
349
350 // downheap
351 size_t k = 0;
352 size_t j;
353 Scalar dist = distances_[0];
354 Index idx = indices_[0];
355 while(2 * k + 1 < size_)
356 {
357 j = 2 * k + 1;
358 if(j + 1 < size_ && distances_[j+1] > distances_[j])
359 ++j;
360 // j references now greatest child
361 if(dist >= distances_[j])
362 break;
363 distances_[k] = distances_[j];
364 indices_[k] = indices_[j];
365 k = j;
366 }
367 distances_[k] = dist;
368 indices_[k] = idx;
369 }
370
371 /** Returns the distance of the element in front of the heap. */
372 Scalar front() const
373 {
374 assert(!empty());
375 return distances_[0];
376 }
377
378 /** Determines if this query heap is full.
379 * The heap is considered full if its number of elements
380 * has reached its max size.
381 * @return true if the heap is full, else false */
382 bool full() const
383 {
384 return size_ >= maxSize_;
385 }
386
387 /** Determines if this query heap is empty.
388 * @return true if the heap contains no elements, else false */
389 bool empty() const
390 {
391 return size_ == 0;
392 }
393
394 /** Returns the number of elements within the query heap.
395 * @return number of elements in the heap */
396 size_t size() const
397 {
398 return size_;
399 }
400
401 /** Clears the query heap. */
402 void clear()
403 {
404 size_ = 0;
405 }
406
407 /** Sorts the elements within the heap according to
408 * their distance. */
409 void sort()
410 {
411 size_t cnt = size_;
412 for(size_t i = 0; i < cnt; ++i)
413 {
414 Index idx = indices_[0];
415 Scalar dist = distances_[0];
416 pop();
417 indices_[cnt - i - 1] = idx;
418 distances_[cnt - i - 1] = dist;
419 }
420 }
421 };
422
423 /** Class for performing brute force knn search. */
424 template<typename Scalar,
425 typename Distance=EuclideanDistance<Scalar>>
426 class BruteForce
427 {
428 public:
429 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
430 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
431 typedef knncpp::Matrixi Matrixi;
432 private:
433 Distance distance_ = Distance();
434 Matrix dataCopy_ = Matrix();
435 const Matrix *data_ = nullptr;
436
437 bool sorted_ = true;
438 bool takeRoot_ = true;
439 Index threads_ = 1;
440 Scalar maxDist_ = 0;
441
442 public:
443
444 BruteForce() = default;
445
446 /** Constructs a brute force instance with the given data.
447 * @param data NxM matrix, M points of dimension N
448 * @param copy if true copies the data, otherwise assumes static data */
449 BruteForce(const Matrix &data, const bool copy = false)
450 : BruteForce()
451 {
452 setData(data, copy);
453 }
454
455 /** Set if the points returned by the queries should be sorted
456 * according to their distance to the query points.
457 * @param sorted sort query results */
458 void setSorted(const bool sorted)
459 {
460 sorted_ = sorted;
461 }
462
463 /** Set if the distances after the query should be rooted or not.
464 * Taking the root of the distances increases query time, but the
465 * function will return true distances instead of their powered
466 * versions.
467 * @param takeRoot set true if root should be taken else false */
468 void setTakeRoot(const bool takeRoot)
469 {
470 takeRoot_ = takeRoot;
471 }
472
473 /** Set the amount of threads that should be used for querying.
474 * OpenMP has to be enabled for this to work.
475 * @param threads amount of threads, 0 for optimal choice */
476 void setThreads(const unsigned int threads)
477 {
478 threads_ = threads;
479 }
480
481 /** Set the maximum distance for querying the tree.
482 * The search will be pruned if the maximum distance is set to any
483 * positive number.
484 * @param maxDist maximum distance, <= 0 for no limit */
485 void setMaxDistance(const Scalar maxDist)
486 {
487 maxDist_ = maxDist;
488 }
489
490 /** Set the data points used for this tree.
491 * This does not build the tree.
492 * @param data NxM matrix, M points of dimension N
493 * @param copy if true data is copied, assumes static data otherwise */
494 void setData(const Matrix &data, const bool copy = false)
495 {
496 if(copy)
497 {
498 dataCopy_ = data;
499 data_ = &dataCopy_;
500 }
501 else
502 {
503 data_ = &data;
504 }
505 }
506
507 void setDistance(const Distance &distance)
508 {
509 distance_ = distance;
510 }
511
512 void build()
513 { }
514
515 template<typename Derived>
516 void query(const Eigen::MatrixBase<Derived> &queryPoints,
517 const size_t knn,
518 Matrixi &indices,
519 Matrix &distances) const
520 {
521 if(data_ == nullptr)
522 throw std::runtime_error("cannot query BruteForce: data not set");
523 if(data_->size() == 0)
524 throw std::runtime_error("cannot query BruteForce: data is empty");
525 if(queryPoints.rows() != dimension())
526 throw std::runtime_error("cannot query BruteForce: data and query descriptors do not have same dimension");
527
528 const Matrix &dataPoints = *data_;
529
530 indices.setConstant(knn, queryPoints.cols(), -1);
531 distances.setConstant(knn, queryPoints.cols(), -1);
532
533 #pragma omp parallel for num_threads(threads_)
534 for(Index i = 0; i < queryPoints.cols(); ++i)
535 {
536 Index *idxPoint = &indices.data()[i * knn];
537 Scalar *distPoint = &distances.data()[i * knn];
538
539 QueryHeap<Scalar> heap(idxPoint, distPoint, knn);
540
541 for(Index j = 0; j < dataPoints.cols(); ++j)
542 {
543 Scalar dist = distance_(queryPoints.col(i), dataPoints.col(j));
544
545 // check if point is in range if max distance was set
546 bool isInRange = maxDist_ <= 0 || dist <= maxDist_;
547 // check if this node was an improvement if heap is already full
548 bool isImprovement = !heap.full() ||
549 dist < heap.front();
550 if(isInRange && isImprovement)
551 {
552 if(heap.full())
553 heap.pop();
554 heap.push(j, dist);
555 }
556 }
557
558 if(sorted_)
559 heap.sort();
560
561 if(takeRoot_)
562 {
563 for(size_t j = 0; j < knn; ++j)
564 {
565 if(idxPoint[j] < 0)
566 break;
567 distPoint[j] = distance_(distPoint[j]);
568 }
569 }
570 }
571 }
572
573 /** Returns the amount of data points stored in the search index.
574 * @return number of data points */
575 Index size() const
576 {
577 return data_ == nullptr ? 0 : data_->cols();
578 }
579
580 /** Returns the dimension of the data points in the search index.
581 * @return dimension of data points */
582 Index dimension() const
583 {
584 return data_ == nullptr ? 0 : data_->rows();
585 }
586 };
587
588 // template<typename Scalar>
589 // struct MeanMidpointRule
590 // {
591 // typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
592 // typedef knncpp::Matrixi Matrixi;
593
594 // void operator(const Matrix &data, const Matrixi &indices, Index split)
595 // };
596
597 /** Class for performing k nearest neighbour searches with minkowski distances.
598 * This kdtree only works reliably with the minkowski distance and its
599 * special cases like manhatten or euclidean distance.
600 * @see ManhattenDistance, EuclideanDistance, ChebyshevDistance, MinkowskiDistance*/
601 template<typename _Scalar, int _Dimension, typename _Distance>
602 class KDTreeMinkowski
603 {
604 public:
605 typedef _Scalar Scalar;
606 typedef _Distance Distance;
607 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
608 typedef Eigen::Matrix<Scalar, _Dimension, Eigen::Dynamic> DataMatrix;
609 typedef Eigen::Matrix<Scalar, _Dimension, 1> DataVector;
610 typedef knncpp::Matrixi Matrixi;
611 private:
612 typedef Eigen::Matrix<Scalar, 2, 1> Bounds;
613 typedef Eigen::Matrix<Scalar, 2, _Dimension> BoundingBox;
614
615 /** Struct representing a node in the KDTree.
616 * It can be either a inner node or a leaf node. */
617 struct Node
618 {
619 /** Indices of data points in this leaf node. */
620 Index startIdx = 0;
621 Index length = 0;
622
623 /** Left child of this inner node. */
624 Index left = -1;
625 /** Right child of this inner node. */
626 Index right = -1;
627 /** Axis of the axis aligned splitting hyper plane. */
628 Index splitaxis = -1;
629 /** Translation of the axis aligned splitting hyper plane. */
630 Scalar splitpoint = 0;
631 /** Lower end of the splitpoint range */
632 Scalar splitlower = 0;
633 /** Upper end of the splitpoint range */
634 Scalar splitupper = 0;
635
636
637 Node() = default;
638
639 /** Constructor for leaf nodes */
640 Node(const Index startIdx, const Index length)
641 : startIdx(startIdx), length(length)
642 { }
643
644 /** Constructor for inner nodes */
645 Node(const Index splitaxis, const Scalar splitpoint,
646 const Index left, const Index right)
647 : left(left), right(right),
648 splitaxis(splitaxis), splitpoint(splitpoint)
649 { }
650
651 bool isLeaf() const
652 {
653 return !hasLeft() && !hasRight();
654 }
655
656 bool isInner() const
657 {
658 return hasLeft() && hasRight();
659 }
660
661 bool hasLeft() const
662 {
663 return left >= 0;
664 }
665
666 bool hasRight() const
667 {
668 return right >= 0;
669 }
670 };
671
672 DataMatrix dataCopy_ = DataMatrix();
673 const DataMatrix *data_ = nullptr;
674 std::vector<Index> indices_ = std::vector<Index>();
675 std::vector<Node> nodes_ = std::vector<Node>();
676
677 Index bucketSize_ = 16;
678 bool sorted_ = true;
679 bool compact_ = false;
680 bool balanced_ = false;
681 bool takeRoot_ = true;
682 Index threads_ = 0;
683 Scalar maxDist_ = 0;
684
685 Distance distance_ = Distance();
686
687 BoundingBox bbox_ = BoundingBox();
688
689 Index buildLeafNode(const Index startIdx,
690 const Index length,
691 BoundingBox &bbox)
692 {
693 nodes_.push_back(Node(startIdx, length));
694 calculateBoundingBox(startIdx, length, bbox);
695 return static_cast<Index>(nodes_.size() - 1);
696 }
697
698 /** Finds the minimum and maximum values of each dimension (row) in the
699 * data matrix. Only respects the columns specified by the index
700 * vector.
701 * @param startIdx starting index within indices data structure to search for bounding box
702 * @param length length of the block of indices*/
703 void calculateBoundingBox(const Index startIdx,
704 const Index length,
705 BoundingBox &bbox) const
706 {
707 assert(length > 0);
708 assert(startIdx >= 0);
709 assert(static_cast<size_t>(startIdx + length) <= indices_.size());
710 assert(data_->rows() == bbox.cols());
711
712 const DataMatrix &data = *data_;
713
714 // initialize bounds of the bounding box
715 Index first = indices_[startIdx];
716 for(Index i = 0; i < bbox.cols(); ++i)
717 {
718 bbox(0, i) = data(i, first);
719 bbox(1, i) = data(i, first);
720 }
721
722 // search for min / max values in data
723 for(Index i = 1; i < length; ++i)
724 {
725 // retrieve data index
726 Index col = indices_[startIdx + i];
727 assert(col >= 0 && col < data.cols());
728
729 // check min and max for each dimension individually
730 for(Index j = 0; j < data.rows(); ++j)
731 {
732 bbox(0, j) = std::min(bbox(0, j), data(j, col));
733 bbox(1, j) = std::max(bbox(1, j), data(j, col));
734 }
735 }
736 }
737
738 /** Calculates the bounds (min / max values) for the given dimension and block of data. */
739 void calculateBounds(const Index startIdx,
740 const Index length,
741 const Index dim,
742 Bounds &bounds) const
743 {
744 assert(length > 0);
745 assert(startIdx >= 0);
746 assert(static_cast<size_t>(startIdx + length) <= indices_.size());
747
748 const DataMatrix &data = *data_;
749
750 bounds(0) = data(dim, indices_[startIdx]);
751 bounds(1) = data(dim, indices_[startIdx]);
752
753 for(Index i = 1; i < length; ++i)
754 {
755 Index col = indices_[startIdx + i];
756 assert(col >= 0 && col < data.cols());
757
758 bounds(0) = std::min(bounds(0), data(dim, col));
759 bounds(1) = std::max(bounds(1), data(dim, col));
760 }
761 }
762
763 void calculateSplittingMidpoint(const Index startIdx,
764 const Index length,
765 const BoundingBox &bbox,
766 Index &splitaxis,
767 Scalar &splitpoint,
768 Index &splitoffset)
769 {
770 const DataMatrix &data = *data_;
771
772 // search for axis with longest distance
773 splitaxis = 0;
774 Scalar splitsize = static_cast<Scalar>(0);
775 for(Index i = 0; i < data.rows(); ++i)
776 {
777 Scalar diff = bbox(1, i) - bbox(0, i);
778 if(diff > splitsize)
779 {
780 splitaxis = i;
781 splitsize = diff;
782 }
783 }
784
785 // calculate the bounds in this axis and update our data
786 // accordingly
787 Bounds bounds;
788 calculateBounds(startIdx, length, splitaxis, bounds);
789 splitsize = bounds(1) - bounds(0);
790
791 const Index origSplitaxis = splitaxis;
792 for(Index i = 0; i < data.rows(); ++i)
793 {
794 // skip the dimension of the previously found splitaxis
795 if(i == origSplitaxis)
796 continue;
797 Scalar diff = bbox(1, i) - bbox(0, i);
798 // check if the split for this dimension would be potentially larger
799 if(diff > splitsize)
800 {
801 Bounds newBounds;
802 // update the bounds to their actual current value
803 calculateBounds(startIdx, length, splitaxis, newBounds);
804 diff = newBounds(1) - newBounds(0);
805 if(diff > splitsize)
806 {
807 splitaxis = i;
808 splitsize = diff;
809 bounds = newBounds;
810 }
811 }
812 }
813
814 // use the sliding midpoint rule
815 splitpoint = (bounds(0) + bounds(1)) / static_cast<Scalar>(2);
816
817 Index leftIdx = startIdx;
818 Index rightIdx = startIdx + length - 1;
819
820 // first loop checks left < splitpoint and right >= splitpoint
821 while(leftIdx <= rightIdx)
822 {
823 // increment left as long as left has not reached right and
824 // the value of the left element is less than the splitpoint
825 while(leftIdx <= rightIdx && data(splitaxis, indices_[leftIdx]) < splitpoint)
826 ++leftIdx;
827
828 // decrement right as long as left has not reached right and
829 // the value of the right element is greater than the splitpoint
830 while(leftIdx <= rightIdx && data(splitaxis, indices_[rightIdx]) >= splitpoint)
831 --rightIdx;
832
833 if(leftIdx <= rightIdx)
834 {
835 std::swap(indices_[leftIdx], indices_[rightIdx]);
836 ++leftIdx;
837 --rightIdx;
838 }
839 }
840
841 // remember this offset from starting index
842 const Index offset1 = leftIdx - startIdx;
843
844 rightIdx = startIdx + length - 1;
845 // second loop checks left <= splitpoint and right > splitpoint
846 while(leftIdx <= rightIdx)
847 {
848 // increment left as long as left has not reached right and
849 // the value of the left element is less than the splitpoint
850 while(leftIdx <= rightIdx && data(splitaxis, indices_[leftIdx]) <= splitpoint)
851 ++leftIdx;
852
853 // decrement right as long as left has not reached right and
854 // the value of the right element is greater than the splitpoint
855 while(leftIdx <= rightIdx && data(splitaxis, indices_[rightIdx]) > splitpoint)
856 --rightIdx;
857
858 if(leftIdx <= rightIdx)
859 {
860 std::swap(indices_[leftIdx], indices_[rightIdx]);
861 ++leftIdx;
862 --rightIdx;
863 }
864 }
865
866 // remember this offset from starting index
867 const Index offset2 = leftIdx - startIdx;
868
869 const Index halfLength = length / static_cast<Index>(2);
870
871 // find a separation of points such that is best balanced
872 // offset1 denotes separation where equal points are all on the right
873 // offset2 denots separation where equal points are all on the left
874 if (offset1 > halfLength)
875 splitoffset = offset1;
876 else if (offset2 < halfLength)
877 splitoffset = offset2;
878 // when we get here offset1 < halflength and offset2 > halflength
879 // so simply split the equal elements in the middle
880 else
881 splitoffset = halfLength;
882 }
883
884 Index buildInnerNode(const Index startIdx,
885 const Index length,
886 BoundingBox &bbox)
887 {
888 assert(length > 0);
889 assert(startIdx >= 0);
890 assert(static_cast<size_t>(startIdx + length) <= indices_.size());
891 assert(data_->rows() == bbox.cols());
892
893 // create node
894 const Index nodeIdx = nodes_.size();
895 nodes_.push_back(Node());
896
897 Index splitaxis;
898 Index splitoffset;
899 Scalar splitpoint;
900 calculateSplittingMidpoint(startIdx, length, bbox, splitaxis, splitpoint, splitoffset);
901
902 nodes_[nodeIdx].splitaxis = splitaxis;
903 nodes_[nodeIdx].splitpoint = splitpoint;
904
905 const Index leftStart = startIdx;
906 const Index leftLength = splitoffset;
907 const Index rightStart = startIdx + splitoffset;
908 const Index rightLength = length - splitoffset;
909
910 BoundingBox bboxLeft = bbox;
911 BoundingBox bboxRight = bbox;
912
913 // do left build
914 bboxLeft(1, splitaxis) = splitpoint;
915 Index left = buildR(leftStart, leftLength, bboxLeft);
916 nodes_[nodeIdx].left = left;
917
918 // do right build
919 bboxRight(0, splitaxis) = splitpoint;
920 Index right = buildR(rightStart, rightLength, bboxRight);
921 nodes_[nodeIdx].right = right;
922
923 // extract the range of the splitpoint
924 nodes_[nodeIdx].splitlower = bboxLeft(1, splitaxis);
925 nodes_[nodeIdx].splitupper = bboxRight(0, splitaxis);
926
927 // update the bounding box to the values of the new bounding boxes
928 for(Index i = 0; i < bbox.cols(); ++i)
929 {
930 bbox(0, i) = std::min(bboxLeft(0, i), bboxRight(0, i));
931 bbox(1, i) = std::max(bboxLeft(1, i), bboxRight(1, i));
932 }
933
934 return nodeIdx;
935 }
936
937 Index buildR(const Index startIdx,
938 const Index length,
939 BoundingBox &bbox)
940 {
941 // check for base case
942 if(length <= bucketSize_)
943 return buildLeafNode(startIdx, length, bbox);
944 else
945 return buildInnerNode(startIdx, length, bbox);
946 }
947
948 bool isDistanceInRange(const Scalar dist) const
949 {
950 return maxDist_ <= 0 || dist <= maxDist_;
951 }
952
953 bool isDistanceImprovement(const Scalar dist, const QueryHeap<Scalar> &dataHeap) const
954 {
955 return !dataHeap.full() || dist < dataHeap.front();
956 }
957
958 template<typename Derived>
959 void queryLeafNode(const Node &node,
960 const Eigen::MatrixBase<Derived> &queryPoint,
961 QueryHeap<Scalar> &dataHeap) const
962 {
963 assert(node.isLeaf());
964
965 const DataMatrix &data = *data_;
966
967 // go through all points in this leaf node and do brute force search
968 for(Index i = 0; i < node.length; ++i)
969 {
970 const Index idx = node.startIdx + i;
971 assert(idx >= 0 && idx < static_cast<Index>(indices_.size()));
972
973 // retrieve index of the current data point
974 const Index dataIdx = indices_[idx];
975 const Scalar dist = distance_(queryPoint, data.col(dataIdx));
976
977 // check if point is within max distance and if the value would be
978 // an improvement
979 if(isDistanceInRange(dist) && isDistanceImprovement(dist, dataHeap))
980 {
981 if(dataHeap.full())
982 dataHeap.pop();
983 dataHeap.push(dataIdx, dist);
984 }
985 }
986 }
987
988 template<typename Derived>
989 void queryInnerNode(const Node &node,
990 const Eigen::MatrixBase<Derived> &queryPoint,
991 QueryHeap<Scalar> &dataHeap,
992 DataVector &splitdists,
993 const Scalar mindist) const
994 {
995 assert(node.isInner());
996
997 const Index splitaxis = node.splitaxis;
998 const Scalar splitval = queryPoint(splitaxis, 0);
999 Scalar splitdist;
1000 Index firstNode;
1001 Index secondNode;
1002 // check if right or left child should be visited
1003 const bool visitLeft = (splitval - node.splitlower + splitval - node.splitupper) < 0;
1004 if(visitLeft)
1005 {
1006 firstNode = node.left;
1007 secondNode = node.right;
1008 splitdist = distance_(splitval, node.splitupper);
1009 }
1010 else
1011 {
1012 firstNode = node.right;
1013 secondNode = node.left;
1014 splitdist = distance_(splitval, node.splitlower);
1015 }
1016
1017 queryR(nodes_[firstNode], queryPoint, dataHeap, splitdists, mindist);
1018
1019 const Scalar mindistNew = mindist + splitdist - splitdists(splitaxis);
1020
1021 // check if node is in range if max distance was set
1022 // check if this node was an improvement if heap is already full
1023 if(isDistanceInRange(mindistNew) && isDistanceImprovement(mindistNew, dataHeap))
1024 {
1025 const Scalar splitdistOld = splitdists(splitaxis);
1026 splitdists(splitaxis) = splitdist;
1027 queryR(nodes_[secondNode], queryPoint, dataHeap, splitdists, mindistNew);
1028 splitdists(splitaxis) = splitdistOld;
1029 }
1030 }
1031
1032 template<typename Derived>
1033 void queryR(const Node &node,
1034 const Eigen::MatrixBase<Derived> &queryPoint,
1035 QueryHeap<Scalar> &dataHeap,
1036 DataVector &splitdists,
1037 const Scalar mindist) const
1038 {
1039 if(node.isLeaf())
1040 queryLeafNode(node, queryPoint, dataHeap);
1041 else
1042 queryInnerNode(node, queryPoint, dataHeap, splitdists, mindist);
1043 }
1044
1045 /** Recursively computes the depth for the given node. */
1046 Index depthR(const Node &node) const
1047 {
1048 if(node.isLeaf())
1049 return 1;
1050 else
1051 {
1052 Index left = depthR(nodes_[node.left]);
1053 Index right = depthR(nodes_[node.right]);
1054 return std::max(left, right) + 1;
1055 }
1056 }
1057
1058 public:
1059
1060 /** Constructs an empty KDTree. */
1061 KDTreeMinkowski()
1062 { }
1063
1064 /** Constructs KDTree with the given data. This does not build the
1065 * the index of the tree.
1066 * @param data NxM matrix, M points of dimension N
1067 * @param copy if true copies the data, otherwise assumes static data */
1068 KDTreeMinkowski(const DataMatrix &data, const bool copy=false)
1069 {
1070 setData(data, copy);
1071 }
1072
1073 /** Set the maximum amount of data points per leaf in the tree (aka
1074 * bucket size).
1075 * @param bucketSize amount of points per leaf. */
1076 void setBucketSize(const Index bucketSize)
1077 {
1078 bucketSize_ = bucketSize;
1079 }
1080
1081 /** Set if the points returned by the queries should be sorted
1082 * according to their distance to the query points.
1083 * @param sorted sort query results */
1084 void setSorted(const bool sorted)
1085 {
1086 sorted_ = sorted;
1087 }
1088
1089 /** Set if the tree should be built as balanced as possible.
1090 * This increases build time, but decreases search time.
1091 * @param balanced set true to build a balanced tree */
1092 void setBalanced(const bool balanced)
1093 {
1094 balanced_ = balanced;
1095 }
1096
1097 /** Set if the distances after the query should be rooted or not.
1098 * Taking the root of the distances increases query time, but the
1099 * function will return true distances instead of their powered
1100 * versions.
1101 * @param takeRoot set true if root should be taken else false */
1102 void setTakeRoot(const bool takeRoot)
1103 {
1104 takeRoot_ = takeRoot;
1105 }
1106
1107 /** Set if the tree should be built with compact leaf nodes.
1108 * This increases build time, but makes leaf nodes denser (more)
1109 * points. Thus less visits are necessary.
1110 * @param compact set true ti build a tree with compact leafs */
1111 void setCompact(const bool compact)
1112 {
1113 compact_ = compact;
1114 }
1115
1116 /** Set the amount of threads that should be used for building and
1117 * querying the tree.
1118 * OpenMP has to be enabled for this to work.
1119 * @param threads amount of threads, 0 for optimal choice */
1120 void setThreads(const unsigned int threads)
1121 {
1122 threads_ = threads;
1123 }
1124
1125 /** Set the maximum distance for querying the tree.
1126 * The search will be pruned if the maximum distance is set to any
1127 * positive number.
1128 * @param maxDist maximum distance, <= 0 for no limit */
1129 void setMaxDistance(const Scalar maxDist)
1130 {
1131 maxDist_ = maxDist;
1132 }
1133
1134 /** Set the data points used for this tree.
1135 * This does not build the tree.
1136 * @param data NxM matrix, M points of dimension N
1137 * @param copy if true data is copied, assumes static data otherwise */
1138 void setData(const DataMatrix &data, const bool copy = false)
1139 {
1140 clear();
1141 if(copy)
1142 {
1143 dataCopy_ = data;
1144 data_ = &dataCopy_;
1145 }
1146 else
1147 {
1148 data_ = &data;
1149 }
1150 }
1151
1152 void setDistance(const Distance &distance)
1153 {
1154 distance_ = distance;
1155 }
1156
1157 /** Builds the search index of the tree.
1158 * Data has to be set and must be non-empty. */
1159 void build()
1160 {
1161 if(data_ == nullptr)
1162 throw std::runtime_error("cannot build KDTree; data not set");
1163
1164 if(data_->size() == 0)
1165 throw std::runtime_error("cannot build KDTree; data is empty");
1166
1167 clear();
1168 nodes_.reserve((data_->cols() / bucketSize_) + 1);
1169
1170 // initialize indices in simple sequence
1171 indices_.resize(data_->cols());
1172 for(size_t i = 0; i < indices_.size(); ++i)
1173 indices_[i] = i;
1174
1175 bbox_.resize(2, data_->rows());
1176 Index startIdx = 0;
1177 Index length = data_->cols();
1178
1179 calculateBoundingBox(startIdx, length, bbox_);
1180
1181 buildR(startIdx, length, bbox_);
1182 }
1183
1184 /** Queries the tree for the nearest neighbours of the given query
1185 * points.
1186 *
1187 * The tree has to be built before it can be queried.
1188 *
1189 * The query points have to have the same dimension as the data points
1190 * of the tree.
1191 *
1192 * The result matrices will be resized appropriatley.
1193 * Indices and distances will be set to -1 if less than knn neighbours
1194 * were found.
1195 *
1196 * @param queryPoints NxM matrix, M points of dimension N
1197 * @param knn amount of neighbours to be found
1198 * @param indices KNNxM matrix, indices of neighbours in the data set
1199 * @param distances KNNxM matrix, distance between query points and
1200 * neighbours */
1201 template<typename Derived>
1202 void query(const Eigen::MatrixBase<Derived> &queryPoints,
1203 const size_t knn,
1204 Matrixi &indices,
1205 Matrix &distances) const
1206 {
1207 if(nodes_.size() == 0)
1208 throw std::runtime_error("cannot query KDTree; not built yet");
1209
1210 if(queryPoints.rows() != dimension())
1211 throw std::runtime_error("cannot query KDTree; data and query points do not have same dimension");
1212
1213 distances.setConstant(knn, queryPoints.cols(), -1);
1214 indices.setConstant(knn, queryPoints.cols(), -1);
1215
1216 Index *indicesRaw = indices.data();
1217 Scalar *distsRaw = distances.data();
1218
1219 #pragma omp parallel for num_threads(threads_)
1220 for(Index i = 0; i < queryPoints.cols(); ++i)
1221 {
1222
1223 Scalar *distPoint = &distsRaw[i * knn];
1224 Index *idxPoint = &indicesRaw[i * knn];
1225
1226 // create heap to find nearest neighbours
1227 QueryHeap<Scalar> dataHeap(idxPoint, distPoint, knn);
1228
1229 Scalar mindist = static_cast<Scalar>(0);
1230 DataVector splitdists(queryPoints.rows());
1231
1232 for(Index j = 0; j < splitdists.rows(); ++j)
1233 {
1234 const Scalar value = queryPoints(j, i);
1235 const Scalar lower = bbox_(0, j);
1236 const Scalar upper = bbox_(1, j);
1237 if(value < lower)
1238 {
1239 splitdists(j) = distance_(value, lower);
1240 }
1241 else if(value > upper)
1242 {
1243 splitdists(j) = distance_(value, upper);
1244 }
1245 else
1246 {
1247 splitdists(j) = static_cast<Scalar>(0);
1248 }
1249
1250 mindist += splitdists(j);
1251 }
1252
1253 queryR(nodes_[0], queryPoints.col(i), dataHeap, splitdists, mindist);
1254
1255 if(sorted_)
1256 dataHeap.sort();
1257
1258 if(takeRoot_)
1259 {
1260 for(size_t j = 0; j < knn; ++j)
1261 {
1262 if(distPoint[j] < 0)
1263 break;
1264 distPoint[j] = distance_(distPoint[j]);
1265 }
1266 }
1267 }
1268 }
1269
1270 /** Clears the tree. */
1271 void clear()
1272 {
1273 nodes_.clear();
1274 }
1275
1276 /** Returns the amount of data points stored in the search index.
1277 * @return number of data points */
1278 Index size() const
1279 {
1280 return data_ == nullptr ? 0 : data_->cols();
1281 }
1282
1283 /** Returns the dimension of the data points in the search index.
1284 * @return dimension of data points */
1285 Index dimension() const
1286 {
1287 return data_ == nullptr ? 0 : data_->rows();
1288 }
1289
1290 /** Returns the maxximum depth of the tree.
1291 * @return maximum depth of the tree */
1292 Index depth() const
1293 {
1294 return nodes_.size() == 0 ? 0 : depthR(nodes_.front());
1295 }
1296 };
1297
1298 template<typename _Scalar, typename _Distance = EuclideanDistance<_Scalar>> using KDTreeMinkowski2 = KDTreeMinkowski<_Scalar, 2, _Distance>;
1299 template<typename _Scalar, typename _Distance = EuclideanDistance<_Scalar>> using KDTreeMinkowski3 = KDTreeMinkowski<_Scalar, 3, _Distance>;
1300 template<typename _Scalar, typename _Distance = EuclideanDistance<_Scalar>> using KDTreeMinkowski4 = KDTreeMinkowski<_Scalar, 4, _Distance>;
1301 template<typename _Scalar, typename _Distance = EuclideanDistance<_Scalar>> using KDTreeMinkowski5 = KDTreeMinkowski<_Scalar, 5, _Distance>;
1302 template<typename _Scalar, typename _Distance = EuclideanDistance<_Scalar>> using KDTreeMinkowskiX = KDTreeMinkowski<_Scalar, Eigen::Dynamic, _Distance>;
1303
1304 /** Class for performing KNN search in hamming space by multi-index hashing. */
1305 template<typename Scalar>
1306 class MultiIndexHashing
1307 {
1308 public:
1309 static_assert(std::is_integral<Scalar>::value, "MultiIndexHashing Scalar has to be integral");
1310
1311 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
1312 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
1313 typedef knncpp::Matrixi Matrixi;
1314
1315 private:
1316 HammingDistance<Scalar> distance_;
1317
1318 Matrix dataCopy_;
1319 const Matrix *data_;
1320
1321 bool sorted_;
1322 Scalar maxDist_;
1323 Index substrLen_;
1324 Index threads_;
1325 std::vector<std::map<Scalar, std::vector<Index>>> buckets_;
1326
1327 template<typename Derived>
1328 Scalar extractCode(const Eigen::MatrixBase<Derived> &data,
1329 const Index idx,
1330 const Index offset) const
1331 {
1332 Index leftShift = std::max<Index>(0, static_cast<Index>(sizeof(Scalar)) - offset - substrLen_);
1333 Index rightShift = leftShift + offset;
1334
1335 Scalar code = (data(idx, 0) << (leftShift * 8)) >> (rightShift * 8);
1336
1337 if(static_cast<Index>(sizeof(Scalar)) - offset < substrLen_ && idx + 1 < data.rows())
1338 {
1339 Index shift = 2 * static_cast<Index>(sizeof(Scalar)) - substrLen_ - offset;
1340 code |= data(idx+1, 0) << (shift * 8);
1341 }
1342
1343 return code;
1344 }
1345 public:
1346 MultiIndexHashing()
1347 : distance_(), dataCopy_(), data_(nullptr), sorted_(true),
1348 maxDist_(0), substrLen_(1), threads_(1)
1349 { }
1350
1351 /** Constructs an index with the given data.
1352 * This does not build the the index.
1353 * @param data NxM matrix, M points of dimension N
1354 * @param copy if true copies the data, otherwise assumes static data */
1355 MultiIndexHashing(const Matrix &data, const bool copy=false)
1356 : MultiIndexHashing()
1357 {
1358 setData(data, copy);
1359 }
1360
1361 /** Set the maximum distance for querying the index.
1362 * Note that if no maximum distance is used, this algorithm performs
1363 * basically a brute force search.
1364 * @param maxDist maximum distance, <= 0 for no limit */
1365 void setMaxDistance(const Scalar maxDist)
1366 {
1367 maxDist_ = maxDist;
1368 }
1369
1370 /** Set if the points returned by the queries should be sorted
1371 * according to their distance to the query points.
1372 * @param sorted sort query results */
1373 void setSorted(const bool sorted)
1374 {
1375 sorted_ = sorted;
1376 }
1377
1378 /** Set the amount of threads that should be used for building and
1379 * querying the tree.
1380 * OpenMP has to be enabled for this to work.
1381 * @param threads amount of threads, 0 for optimal choice */
1382 void setThreads(const unsigned int threads)
1383 {
1384 threads_ = threads;
1385 }
1386
1387 /** Set the length of substrings (in bytes) used for multi index hashing.
1388 * @param len lentth of bucket substrings in bytes*/
1389 void setSubstringLength(const Index len)
1390 {
1391 substrLen_ = len;
1392 }
1393
1394 /** Set the data points used for the KNN search.
1395 * @param data NxM matrix, M points of dimension N
1396 * @param copy if true data is copied, assumes static data otherwise */
1397 void setData(const Matrix &data, const bool copy = false)
1398 {
1399 clear();
1400 if(copy)
1401 {
1402 dataCopy_ = data;
1403 data_ = &dataCopy_;
1404 }
1405 else
1406 {
1407 data_ = &data;
1408 }
1409 }
1410
1411 void build()
1412 {
1413 if(data_ == nullptr)
1414 throw std::runtime_error("cannot build MultiIndexHashing; data not set");
1415 if(data_->size() == 0)
1416 throw std::runtime_error("cannot build MultiIndexHashing; data is empty");
1417
1418 const Matrix &data = *data_;
1419 const Index bytesPerVec = data.rows() * static_cast<Index>(sizeof(Scalar));
1420 if(bytesPerVec % substrLen_ != 0)
1421 throw std::runtime_error("cannot build MultiIndexHashing; cannot divide byte count per vector by substring length without remainings");
1422
1423 buckets_.clear();
1424 buckets_.resize(bytesPerVec / substrLen_);
1425
1426 for(size_t i = 0; i < buckets_.size(); ++i)
1427 {
1428 Index start = static_cast<Index>(i) * substrLen_;
1429 Index idx = start / static_cast<Index>(sizeof(Scalar));
1430 Index offset = start % static_cast<Index>(sizeof(Scalar));
1431 std::map<Scalar, std::vector<Index>> &map = buckets_[i];
1432
1433 for(Index c = 0; c < data.cols(); ++c)
1434 {
1435 Scalar code = extractCode(data.col(c), idx, offset);
1436 if(map.find(code) == map.end())
1437 map[code] = std::vector<Index>();
1438 map[code].push_back(c);
1439 }
1440 }
1441 }
1442
1443 template<typename Derived>
1444 void query(const Eigen::MatrixBase<Derived> &queryPoints,
1445 const size_t knn,
1446 Matrixi &indices,
1447 Matrix &distances) const
1448 {
1449 if(buckets_.size() == 0)
1450 throw std::runtime_error("cannot query MultiIndexHashing; not built yet");
1451 if(queryPoints.rows() != dimension())
1452 throw std::runtime_error("cannot query MultiIndexHashing; data and query points do not have same dimension");
1453
1454 const Matrix &data = *data_;
1455
1456 indices.setConstant(knn, queryPoints.cols(), -1);
1457 distances.setConstant(knn, queryPoints.cols(), -1);
1458
1459 Index *indicesRaw = indices.data();
1460 Scalar *distsRaw = distances.data();
1461
1462 Scalar maxDistPart = maxDist_ / buckets_.size();
1463
1464 #pragma omp parallel for num_threads(threads_)
1465 for(Index c = 0; c < queryPoints.cols(); ++c)
1466 {
1467 std::set<Index> candidates;
1468 for(size_t i = 0; i < buckets_.size(); ++i)
1469 {
1470 Index start = static_cast<Index>(i) * substrLen_;
1471 Index idx = start / static_cast<Index>(sizeof(Scalar));
1472 Index offset = start % static_cast<Index>(sizeof(Scalar));
1473 const std::map<Scalar, std::vector<Index>> &map = buckets_[i];
1474
1475 Scalar code = extractCode(queryPoints.col(c), idx, offset);
1476 for(const auto &x: map)
1477 {
1478 Scalar dist = distance_(x.first, code);
1479 if(maxDistPart <= 0 || dist <= maxDistPart)
1480 {
1481 for(size_t j = 0; j < x.second.size(); ++j)
1482 candidates.insert(x.second[j]);
1483 }
1484 }
1485 }
1486
1487 Scalar *distPoint = &distsRaw[c * knn];
1488 Index *idxPoint = &indicesRaw[c * knn];
1489 // create heap to find nearest neighbours
1490 QueryHeap<Scalar> dataHeap(idxPoint, distPoint, knn);
1491
1492 for(Index idx: candidates)
1493 {
1494 Scalar dist = distance_(data.col(idx), queryPoints.col(c));
1495
1496 bool isInRange = maxDist_ <= 0 || dist <= maxDist_;
1497 bool isImprovement = !dataHeap.full() ||
1498 dist < dataHeap.front();
1499 if(isInRange && isImprovement)
1500 {
1501 if(dataHeap.full())
1502 dataHeap.pop();
1503 dataHeap.push(idx, dist);
1504 }
1505 }
1506
1507 if(sorted_)
1508 dataHeap.sort();
1509 }
1510 }
1511
1512 /** Returns the amount of data points stored in the search index.
1513 * @return number of data points */
1514 Index size() const
1515 {
1516 return data_ == nullptr ? 0 : data_->cols();
1517 }
1518
1519 /** Returns the dimension of the data points in the search index.
1520 * @return dimension of data points */
1521 Index dimension() const
1522 {
1523 return data_ == nullptr ? 0 : data_->rows();
1524 }
1525
1526 void clear()
1527 {
1528 data_ = nullptr;
1529 dataCopy_.resize(0, 0);
1530 buckets_.clear();
1531 }
1532
1533 };
1534
1535 #ifdef KNNCPP_FLANN
1536
1537 /** Wrapper class of FLANN kdtrees for the use with Eigen3. */
1538 template<typename Scalar,
1539 typename Distance=flann::L2_Simple<Scalar>>
1540 class KDTreeFlann
1541 {
1542 public:
1543 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
1544 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
1545 typedef Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic> Matrixi;
1546
1547 private:
1548 typedef flann::Index<Distance> FlannIndex;
1549
1550 Matrix dataCopy_;
1551 Matrix *dataPoints_;
1552
1553 FlannIndex *index_;
1554 flann::SearchParams searchParams_;
1555 flann::IndexParams indexParams_;
1556 Scalar maxDist_;
1557
1558 public:
1559 KDTreeFlann()
1560 : dataCopy_(), dataPoints_(nullptr), index_(nullptr),
1561 searchParams_(32, 0, false),
1562 indexParams_(flann::KDTreeSingleIndexParams(15)),
1563 maxDist_(0)
1564 {
1565 }
1566
1567 KDTreeFlann(Matrix &data, const bool copy = false)
1568 : KDTreeFlann()
1569 {
1570 setData(data, copy);
1571 }
1572
1573 ~KDTreeFlann()
1574 {
1575 clear();
1576 }
1577
1578 void setIndexParams(const flann::IndexParams &params)
1579 {
1580 indexParams_ = params;
1581 }
1582
1583 void setChecks(const int checks)
1584 {
1585 searchParams_.checks = checks;
1586 }
1587
1588 void setSorted(const bool sorted)
1589 {
1590 searchParams_.sorted = sorted;
1591 }
1592
1593 void setThreads(const int threads)
1594 {
1595 searchParams_.cores = threads;
1596 }
1597
1598 void setEpsilon(const float eps)
1599 {
1600 searchParams_.eps = eps;
1601 }
1602
1603 void setMaxDistance(const Scalar dist)
1604 {
1605 maxDist_ = dist;
1606 }
1607
1608 void setData(Matrix &data, const bool copy = false)
1609 {
1610 if(copy)
1611 {
1612 dataCopy_ = data;
1613 dataPoints_ = &dataCopy_;
1614 }
1615 else
1616 {
1617 dataPoints_ = &data;
1618 }
1619
1620 clear();
1621 }
1622
1623 void build()
1624 {
1625 if(dataPoints_ == nullptr)
1626 throw std::runtime_error("cannot build KDTree; data not set");
1627 if(dataPoints_->size() == 0)
1628 throw std::runtime_error("cannot build KDTree; data is empty");
1629
1630 if(index_ != nullptr)
1631 delete index_;
1632
1633 flann::Matrix<Scalar> dataPts(
1634 dataPoints_->data(),
1635 dataPoints_->cols(),
1636 dataPoints_->rows());
1637
1638 index_ = new FlannIndex(dataPts, indexParams_);
1639 index_->buildIndex();
1640 }
1641
1642 void query(Matrix &queryPoints,
1643 const size_t knn,
1644 Matrixi &indices,
1645 Matrix &distances) const
1646 {
1647 if(index_ == nullptr)
1648 throw std::runtime_error("cannot query KDTree; not built yet");
1649 if(dataPoints_->rows() != queryPoints.rows())
1650 throw std::runtime_error("cannot query KDTree; KDTree has different dimension than query data");
1651
1652 // resize result matrices
1653 distances.resize(knn, queryPoints.cols());
1654 indices.resize(knn, queryPoints.cols());
1655
1656 // wrap matrices into flann matrices
1657 flann::Matrix<Scalar> queryPts(
1658 queryPoints.data(),
1659 queryPoints.cols(),
1660 queryPoints.rows());
1661 flann::Matrix<int> indicesF(
1662 indices.data(),
1663 indices.cols(),
1664 indices.rows());
1665 flann::Matrix<Scalar> distancesF(
1666 distances.data(),
1667 distances.cols(),
1668 distances.rows());
1669
1670 // if maximum distance was set then use radius search
1671 if(maxDist_ > 0)
1672 index_->radiusSearch(queryPts, indicesF, distancesF, maxDist_, searchParams_);
1673 else
1674 index_->knnSearch(queryPts, indicesF, distancesF, knn, searchParams_);
1675
1676 // make result matrices compatible to API
1677 #pragma omp parallel for num_threads(searchParams_.cores)
1678 for(Index i = 0; i < indices.cols(); ++i)
1679 {
1680 bool found = false;
1681 for(Index j = 0; j < indices.rows(); ++j)
1682 {
1683 if(indices(j, i) == -1)
1684 found = true;
1685
1686 if(found)
1687 {
1688 indices(j, i) = -1;
1689 distances(j, i) = -1;
1690 }
1691 }
1692 }
1693 }
1694
1695 Index size() const
1696 {
1697 return dataPoints_ == nullptr ? 0 : dataPoints_->cols();
1698 }
1699
1700 Index dimension() const
1701 {
1702 return dataPoints_ == nullptr ? 0 : dataPoints_->rows();
1703 }
1704
1705 void clear()
1706 {
1707 if(index_ != nullptr)
1708 {
1709 delete index_;
1710 index_ = nullptr;
1711 }
1712 }
1713
1714 FlannIndex &flannIndex()
1715 {
1716 return index_;
1717 }
1718 };
1719
1720 typedef KDTreeFlann<double> KDTreeFlannd;
1721 typedef KDTreeFlann<float> KDTreeFlannf;
1722
1723 #endif
1724 }
1725
1726 #endif