Commit d78107a3 authored by Hans-Christian Ebke's avatar Hans-Christian Ebke
Browse files

Algorithm/DBSCAN: Improved and fixed DBSCAN.

git-svn-id: http://www.openflipper.org/svnrepo/OpenFlipper/branches/Free@14639 383ad7c9-94d9-4d36-a494-682f7c89f535
parent e92a6fa3
...@@ -12,6 +12,22 @@ namespace Algorithm { ...@@ -12,6 +12,22 @@ namespace Algorithm {
namespace _DBSCAN_PRIVATE { namespace _DBSCAN_PRIVATE {
template<typename VALUE_TYPE>
class constant_1 {
public:
inline double operator()(VALUE_TYPE it) const {
return 1.0;
}
};
template<typename INPUT_ITERATOR, typename WEIGHT_FUNC>
inline double neighborhoodWeight(INPUT_ITERATOR first, const INPUT_ITERATOR last, WEIGHT_FUNC &weight_func) {
double result = 0;
for (; first != last; ++first)
result += weight_func(**first);
return result;
}
/* /*
* Private functions. * Private functions.
*/ */
...@@ -29,11 +45,11 @@ void region_query(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_I ...@@ -29,11 +45,11 @@ void region_query(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_I
} }
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC> template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename WEIGHT_FUNC>
inline inline
void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_ITERATOR center, void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_ITERATOR center,
DISTANCE_FUNC &distance_func, const double epsilon, const int n_min, DISTANCE_FUNC &distance_func, const double epsilon, const double n_min,
std::vector<int> &id_cache, const int current_cluster_id) { std::vector<int> &id_cache, const int current_cluster_id, WEIGHT_FUNC &weight_func) {
std::queue<INPUT_ITERATOR> bfq; std::queue<INPUT_ITERATOR> bfq;
...@@ -59,7 +75,7 @@ void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT ...@@ -59,7 +75,7 @@ void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT
* If the current element is not inside a dense area, * If the current element is not inside a dense area,
* we don't use it as a seed to expand the cluster. * we don't use it as a seed to expand the cluster.
*/ */
if ((int)neighborhood.size() < n_min) if (neighborhoodWeight(neighborhood.begin(), neighborhood.end(), weight_func) < n_min)
continue; continue;
/* /*
...@@ -69,9 +85,9 @@ void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT ...@@ -69,9 +85,9 @@ void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT
it != it_end; ++it) { it != it_end; ++it) {
const size_t neighbor_idx = std::distance(first, *it); const size_t neighbor_idx = std::distance(first, *it);
/* /*
* Is the element classified, yet? * Is the element classified as non-noise, yet?
*/ */
if (id_cache[neighbor_idx] < 0) { if (id_cache[neighbor_idx] <= 0) {
/* /*
* Classify it and use it as a seed. * Classify it and use it as a seed.
*/ */
......
...@@ -30,6 +30,7 @@ namespace Algorithm { ...@@ -30,6 +30,7 @@ namespace Algorithm {
* *
* Returned cluster indices are guaranteed to be a continuous range starting at 1. * Returned cluster indices are guaranteed to be a continuous range starting at 1.
* *
*
* Result has to support the operation `*result++ = <int>`. * Result has to support the operation `*result++ = <int>`.
* *
* Example: * Example:
...@@ -46,11 +47,13 @@ namespace Algorithm { ...@@ -46,11 +47,13 @@ namespace Algorithm {
* @param result Output iterator to the initial position of the result range. The range includes as many elements as [first, last). * @param result Output iterator to the initial position of the result range. The range includes as many elements as [first, last).
* @param epsilon The density-reachable neighborhood radius. * @param epsilon The density-reachable neighborhood radius.
* @param n_min The density-reachable count threshold. * @param n_min The density-reachable count threshold.
* @param weight_func Unary function taking an element as its argument. Returns the weight with which it is to be considered for DBSCAN.
* The off the shelf DBSCAN uses a constant 1, here.
* @return The number of clusters found. * @return The number of clusters found.
*/ */
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR> template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR, typename WEIGHT_FUNC>
int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC distance_func, int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC distance_func,
OUTPUT_ITERATOR result, const double epsilon, const int n_min) { OUTPUT_ITERATOR result, const double epsilon, const double n_min, WEIGHT_FUNC weight_func) {
const size_t input_size = std::distance(first, last); const size_t input_size = std::distance(first, last);
...@@ -68,12 +71,12 @@ int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC ...@@ -68,12 +71,12 @@ int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC
std::vector<INPUT_ITERATOR> neighborhood; neighborhood.reserve(input_size); std::vector<INPUT_ITERATOR> neighborhood; neighborhood.reserve(input_size);
_DBSCAN_PRIVATE::region_query(first, last, it, distance_func, std::back_inserter(neighborhood), epsilon); _DBSCAN_PRIVATE::region_query(first, last, it, distance_func, std::back_inserter(neighborhood), epsilon);
if ((int)neighborhood.size() < n_min) { if (_DBSCAN_PRIVATE::neighborhoodWeight(neighborhood.begin(), neighborhood.end(), weight_func) < n_min) {
// It's noise. // It's noise.
id_cache[idx] = 0; id_cache[idx] = 0;
} else { } else {
// It's the seed of a cluster. // It's the seed of a cluster.
_DBSCAN_PRIVATE::expand_cluster(first, last, it, distance_func, epsilon, n_min, id_cache, ++current_cluster_id); _DBSCAN_PRIVATE::expand_cluster(first, last, it, distance_func, epsilon, n_min, id_cache, ++current_cluster_id, weight_func);
} }
} }
...@@ -82,6 +85,15 @@ int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC ...@@ -82,6 +85,15 @@ int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC
return current_cluster_id; return current_cluster_id;
} }
/**
* Version of DBSCAN with weight_func being a constant 1.
*/
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR>
int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC distance_func,
OUTPUT_ITERATOR result, const double epsilon, const double n_min) {
return DBSCAN(first, last, distance_func, result, epsilon, n_min, typename _DBSCAN_PRIVATE::constant_1<typename INPUT_ITERATOR::value_type>());
}
} /* namespace Algorithm */ } /* namespace Algorithm */
} /* namespace ACG */ } /* namespace ACG */
#endif /* DBSCAN_HH_ */ #endif /* DBSCAN_HH_ */
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
* Author: ebke * Author: ebke
*/ */
#include <gtest/gtest.h> //#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <vector> #include <vector>
#include <map> #include <map>
...@@ -18,10 +19,11 @@ ...@@ -18,10 +19,11 @@
namespace { namespace {
const char * const test1_map[] = { const char * const test1_map[] = {
" ", " ",
" . ", " . b b . ",
" ", " ",
" a b ", " a b ",
" ", " ",
" ",
" a b b b ", " a b b b ",
" aa b b b ", " aa b b b ",
" aaaa . . b b b bbb b ", " aaaa . . b b b bbb b ",
...@@ -34,23 +36,25 @@ const char * const test1_map[] = { ...@@ -34,23 +36,25 @@ const char * const test1_map[] = {
" ", " ",
" ", " ",
" ", " ",
" . a cc ", " . a a a . cc ",
" cc ", " cc ",
" .. ", " .. ",
" . ", " . ",
" ", " ",
0 }; 0 };
const char * const test2_map[] = { "aaaaAAaaaa", 0 };
class Point { class Point {
public: public:
Point(double x, double y, char classifier) : x(x), y(y), classifier(classifier) {} Point(double x, double y, char classifier, double weight = 1.0) : x(x), y(y), weight(weight), classifier(classifier) {}
double length() const { double length() const {
return std::sqrt(x*x + y*y); return std::sqrt(x*x + y*y);
} }
Point operator- (const Point &rhs) const { Point operator- (const Point &rhs) const {
return Point(x-rhs.x, y-rhs.y, classifier); return Point(x-rhs.x, y-rhs.y, classifier, weight);
} }
double dist(const Point &rhs) const { double dist(const Point &rhs) const {
...@@ -64,23 +68,31 @@ class Point { ...@@ -64,23 +68,31 @@ class Point {
} }
}; };
double x, y; class WeightFunc {
public:
double operator() (const Point &a) const {
return a.weight;
}
};
double x, y, weight;
char classifier; char classifier;
}; };
template<class OSTREAM> template<class OSTREAM>
OSTREAM &operator<< (OSTREAM &stream, const Point &point) { OSTREAM &operator<< (OSTREAM &stream, const Point &point) {
return stream << "(" << point.x << ", " << point.y << ", " << "'" << point.classifier << "'" << ")"; return stream << "(" << point.x << ", " << point.y << ", " << point.weight << ", " << "'" << point.classifier << "'" << ")";
} }
template<class OUTPUT_ITERATOR> template<class OUTPUT_ITERATOR>
void parse_points(const char * const * input, OUTPUT_ITERATOR points_out) { void parse_points(const char * const * input, OUTPUT_ITERATOR points_out, double uc_weight = 1.0, double lc_weight = 1.0) {
int y = 0; int y = 0;
for (; *input != 0; ++input, ++y) { for (; *input != 0; ++input, ++y) {
int x = 0; int x = 0;
for (const char *it = *input; *it != 0; ++it, ++x) { for (const char *it = *input; *it != 0; ++it, ++x) {
if (!isspace(*it)) { if (!isspace(*it)) {
*points_out++ = Point(x, y, *it); const double weight = islower(*it) ? lc_weight : uc_weight;
*points_out++ = Point(x, y, *it, weight);
} }
} }
} }
...@@ -119,10 +131,40 @@ TEST(DBSCAN, manual_test_1) { ...@@ -119,10 +131,40 @@ TEST(DBSCAN, manual_test_1) {
std::vector<Point> points; std::vector<Point> points;
parse_points(test1_map, std::back_inserter(points)); parse_points(test1_map, std::back_inserter(points));
std::vector<int> clusters; std::vector<int> clusters;
EXPECT_EQ(3,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 4.0001, 3.0));
EXPECT_TRUE(checkClusterConsistency(points, clusters));
// Call both versions of DBSCAN.
EXPECT_EQ(3, EXPECT_EQ(3,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(), ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 4.0001, 3)); std::back_inserter(clusters), 4.0001, 3.0,
ACG::Algorithm::_DBSCAN_PRIVATE::constant_1<std::vector<Point>::iterator::value_type>()));
EXPECT_TRUE(checkClusterConsistency(points, clusters)); EXPECT_TRUE(checkClusterConsistency(points, clusters));
} }
TEST(DBSCAN, manual_test_2_a) {
std::vector<Point> points;
parse_points(test2_map, std::back_inserter(points), 1.0, 1.0);
std::vector<int> clusters;
EXPECT_EQ(1,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 1.01, 1.2, Point::WeightFunc()));
EXPECT_THAT(clusters,
::testing::ElementsAre(1, 1, 1, 1, 1, 1, 1, 1, 1, 1));
}
TEST(DBSCAN, manual_test_2_b) {
std::vector<Point> points;
parse_points(test2_map, std::back_inserter(points), 1.0, .5);
std::vector<int> clusters;
EXPECT_EQ(1,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 1.01, 1.2, Point::WeightFunc()));
EXPECT_THAT(clusters,
::testing::ElementsAre(0, 0, 1, 1, 1, 1, 1, 1, 0, 0));
}
} }
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment