Commit d78107a3 authored by Hans-Christian Ebke's avatar Hans-Christian Ebke
Browse files

Algorithm/DBSCAN: Improved and fixed DBSCAN.

git-svn-id: http://www.openflipper.org/svnrepo/OpenFlipper/branches/Free@14639 383ad7c9-94d9-4d36-a494-682f7c89f535
parent e92a6fa3
......@@ -12,6 +12,22 @@ namespace Algorithm {
namespace _DBSCAN_PRIVATE {
template<typename VALUE_TYPE>
class constant_1 {
public:
inline double operator()(VALUE_TYPE it) const {
return 1.0;
}
};
template<typename INPUT_ITERATOR, typename WEIGHT_FUNC>
inline double neighborhoodWeight(INPUT_ITERATOR first, const INPUT_ITERATOR last, WEIGHT_FUNC &weight_func) {
double result = 0;
for (; first != last; ++first)
result += weight_func(**first);
return result;
}
/*
* Private functions.
*/
......@@ -29,11 +45,11 @@ void region_query(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_I
}
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC>
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename WEIGHT_FUNC>
inline
void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_ITERATOR center,
DISTANCE_FUNC &distance_func, const double epsilon, const int n_min,
std::vector<int> &id_cache, const int current_cluster_id) {
DISTANCE_FUNC &distance_func, const double epsilon, const double n_min,
std::vector<int> &id_cache, const int current_cluster_id, WEIGHT_FUNC &weight_func) {
std::queue<INPUT_ITERATOR> bfq;
......@@ -59,7 +75,7 @@ void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT
* If the current element is not inside a dense area,
* we don't use it as a seed to expand the cluster.
*/
if ((int)neighborhood.size() < n_min)
if (neighborhoodWeight(neighborhood.begin(), neighborhood.end(), weight_func) < n_min)
continue;
/*
......@@ -69,9 +85,9 @@ void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT
it != it_end; ++it) {
const size_t neighbor_idx = std::distance(first, *it);
/*
* Is the element classified, yet?
* Is the element classified as non-noise, yet?
*/
if (id_cache[neighbor_idx] < 0) {
if (id_cache[neighbor_idx] <= 0) {
/*
* Classify it and use it as a seed.
*/
......
......@@ -30,6 +30,7 @@ namespace Algorithm {
*
* Returned cluster indices are guaranteed to be a continuous range starting at 1.
*
*
* Result has to support the operation `*result++ = <int>`.
*
* Example:
......@@ -46,11 +47,13 @@ namespace Algorithm {
* @param result Output iterator to the initial position of the result range. The range includes as many elements as [first, last).
* @param epsilon The density-reachable neighborhood radius.
* @param n_min The density-reachable count threshold.
* @param weight_func Unary function taking an element as its argument. Returns the weight with which it is to be considered for DBSCAN.
* The off the shelf DBSCAN uses a constant 1, here.
* @return The number of clusters found.
*/
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR>
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR, typename WEIGHT_FUNC>
int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC distance_func,
OUTPUT_ITERATOR result, const double epsilon, const int n_min) {
OUTPUT_ITERATOR result, const double epsilon, const double n_min, WEIGHT_FUNC weight_func) {
const size_t input_size = std::distance(first, last);
......@@ -68,12 +71,12 @@ int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC
std::vector<INPUT_ITERATOR> neighborhood; neighborhood.reserve(input_size);
_DBSCAN_PRIVATE::region_query(first, last, it, distance_func, std::back_inserter(neighborhood), epsilon);
if ((int)neighborhood.size() < n_min) {
if (_DBSCAN_PRIVATE::neighborhoodWeight(neighborhood.begin(), neighborhood.end(), weight_func) < n_min) {
// It's noise.
id_cache[idx] = 0;
} else {
// It's the seed of a cluster.
_DBSCAN_PRIVATE::expand_cluster(first, last, it, distance_func, epsilon, n_min, id_cache, ++current_cluster_id);
_DBSCAN_PRIVATE::expand_cluster(first, last, it, distance_func, epsilon, n_min, id_cache, ++current_cluster_id, weight_func);
}
}
......@@ -82,6 +85,15 @@ int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC
return current_cluster_id;
}
/**
* Version of DBSCAN with weight_func being a constant 1.
*/
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR>
int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC distance_func,
OUTPUT_ITERATOR result, const double epsilon, const double n_min) {
return DBSCAN(first, last, distance_func, result, epsilon, n_min, typename _DBSCAN_PRIVATE::constant_1<typename INPUT_ITERATOR::value_type>());
}
} /* namespace Algorithm */
} /* namespace ACG */
#endif /* DBSCAN_HH_ */
......@@ -5,7 +5,8 @@
* Author: ebke
*/
#include <gtest/gtest.h>
//#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <vector>
#include <map>
......@@ -18,10 +19,11 @@
namespace {
const char * const test1_map[] = {
" ",
" . ",
" . b b . ",
" ",
" a b ",
" ",
" ",
" a b b b ",
" aa b b b ",
" aaaa . . b b b bbb b ",
......@@ -34,23 +36,25 @@ const char * const test1_map[] = {
" ",
" ",
" ",
" . a cc ",
" . a a a . cc ",
" cc ",
" .. ",
" . ",
" ",
0 };
const char * const test2_map[] = { "aaaaAAaaaa", 0 };
class Point {
public:
Point(double x, double y, char classifier) : x(x), y(y), classifier(classifier) {}
Point(double x, double y, char classifier, double weight = 1.0) : x(x), y(y), weight(weight), classifier(classifier) {}
double length() const {
return std::sqrt(x*x + y*y);
}
Point operator- (const Point &rhs) const {
return Point(x-rhs.x, y-rhs.y, classifier);
return Point(x-rhs.x, y-rhs.y, classifier, weight);
}
double dist(const Point &rhs) const {
......@@ -64,23 +68,31 @@ class Point {
}
};
double x, y;
class WeightFunc {
public:
double operator() (const Point &a) const {
return a.weight;
}
};
double x, y, weight;
char classifier;
};
template<class OSTREAM>
OSTREAM &operator<< (OSTREAM &stream, const Point &point) {
return stream << "(" << point.x << ", " << point.y << ", " << "'" << point.classifier << "'" << ")";
return stream << "(" << point.x << ", " << point.y << ", " << point.weight << ", " << "'" << point.classifier << "'" << ")";
}
template<class OUTPUT_ITERATOR>
void parse_points(const char * const * input, OUTPUT_ITERATOR points_out) {
void parse_points(const char * const * input, OUTPUT_ITERATOR points_out, double uc_weight = 1.0, double lc_weight = 1.0) {
int y = 0;
for (; *input != 0; ++input, ++y) {
int x = 0;
for (const char *it = *input; *it != 0; ++it, ++x) {
if (!isspace(*it)) {
*points_out++ = Point(x, y, *it);
const double weight = islower(*it) ? lc_weight : uc_weight;
*points_out++ = Point(x, y, *it, weight);
}
}
}
......@@ -119,10 +131,40 @@ TEST(DBSCAN, manual_test_1) {
std::vector<Point> points;
parse_points(test1_map, std::back_inserter(points));
std::vector<int> clusters;
EXPECT_EQ(3,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 4.0001, 3.0));
EXPECT_TRUE(checkClusterConsistency(points, clusters));
// Call both versions of DBSCAN.
EXPECT_EQ(3,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 4.0001, 3));
std::back_inserter(clusters), 4.0001, 3.0,
ACG::Algorithm::_DBSCAN_PRIVATE::constant_1<std::vector<Point>::iterator::value_type>()));
EXPECT_TRUE(checkClusterConsistency(points, clusters));
}
TEST(DBSCAN, manual_test_2_a) {
std::vector<Point> points;
parse_points(test2_map, std::back_inserter(points), 1.0, 1.0);
std::vector<int> clusters;
EXPECT_EQ(1,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 1.01, 1.2, Point::WeightFunc()));
EXPECT_THAT(clusters,
::testing::ElementsAre(1, 1, 1, 1, 1, 1, 1, 1, 1, 1));
}
TEST(DBSCAN, manual_test_2_b) {
std::vector<Point> points;
parse_points(test2_map, std::back_inserter(points), 1.0, .5);
std::vector<int> clusters;
EXPECT_EQ(1,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 1.01, 1.2, Point::WeightFunc()));
EXPECT_THAT(clusters,
::testing::ElementsAre(0, 0, 1, 1, 1, 1, 1, 1, 0, 0));
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment