Commit 80f3bc33 authored by Hans-Christian Ebke's avatar Hans-Christian Ebke
Browse files

Added DBSCAN algorithm.

git-svn-id: http://www.openflipper.org/svnrepo/OpenFlipper/branches/Free@14618 383ad7c9-94d9-4d36-a494-682f7c89f535
parent 606943d0
/*
* DBSCAN.cc
*
* Created on: May 18, 2012
* Author: ebke
*/
#include <queue>
namespace ACG {
namespace Algorithm {
namespace _DBSCAN_PRIVATE {
/*
* Private functions.
*/
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR>
inline
void region_query(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_ITERATOR center,
DISTANCE_FUNC &distance_func, OUTPUT_ITERATOR result, const double epsilon) {
for (; first != last; ++first) {
if (center == first) continue;
if (distance_func(*center, *first) <= epsilon) {
*result++ = first;
}
}
}
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC>
inline
void expand_cluster(INPUT_ITERATOR first, const INPUT_ITERATOR last, const INPUT_ITERATOR center,
DISTANCE_FUNC &distance_func, const double epsilon, const int n_min,
std::vector<int> &id_cache, const int current_cluster_id) {
std::queue<INPUT_ITERATOR> bfq;
bfq.push(center);
id_cache[std::distance(first, center)] = current_cluster_id;
std::vector<INPUT_ITERATOR> neighborhood; neighborhood.reserve(std::distance(first, last));
while (!bfq.empty()) {
INPUT_ITERATOR current_element = bfq.front();
bfq.pop();
/*
* Precondition: id_cache[current_idx] > 0
*/
neighborhood.clear();
region_query(first, last, current_element, distance_func, std::back_inserter(neighborhood), epsilon);
/*
* If the current element is not inside a dense area,
* we don't use it as a seed to expand the cluster.
*/
if ((int)neighborhood.size() < n_min)
continue;
/*
* Push yet unvisited elements onto the queue.
*/
for (typename std::vector<INPUT_ITERATOR>::iterator it = neighborhood.begin(), it_end = neighborhood.end();
it != it_end; ++it) {
const size_t neighbor_idx = std::distance(first, *it);
/*
* Is the element classified, yet?
*/
if (id_cache[neighbor_idx] < 0) {
/*
* Classify it and use it as a seed.
*/
id_cache[neighbor_idx] = current_cluster_id;
bfq.push(*it);
}
}
}
}
} /* namespace _DBSCAN_PRIVATE */
} /* namespace Algorithm */
} /* namespace ACG */
/*
* DBSCAN.hh
*
* Created on: May 18, 2012
* Author: ebke
*/
#ifndef DBSCAN_HH_
#define DBSCAN_HH_
#include <vector>
#include <iterator>
#include <algorithm>
/*
* Private functions.
*/
#include "DBSCANT.cc"
namespace ACG {
namespace Algorithm {
/**
* Implements the DBSCAN algorithm introduced in
*
* Ester, Martin, Hans-Peter Kriegel, Jörg S, and Xiaowei Xu. “A Density-based Algorithm for Discovering Clusters in Large Spatial Databases with Noise.” 226–231. AAAI Press, 1996.
*
* Classifies the sequence [first, last) into clusters. Outputs `int`s into the result sequence, one
* for each element in the input sequence. `0` means noise, values greater than 0 specify a cluster index.
*
* Returned cluster indices are guaranteed to be a continuous range starting at 1.
*
* Result has to support the operation `*result++ = <int>`.
*
* @param Input iterator to the initial position of the data set.
* @param Input iterator to the final position of the data set.
* @param distance_func Binary function taking two elements as arguments. Returns the distance between these elements.
* @param result Output iterator to the initial position of the result range. The range includes as many elements as [first, last).
* @param epsilon The density-reachable neighborhood radius.
* @param n_min The density-reachable count threshold.
* @return The number of clusters found.
*/
template<typename INPUT_ITERATOR, typename DISTANCE_FUNC, typename OUTPUT_ITERATOR>
int DBSCAN(const INPUT_ITERATOR first, const INPUT_ITERATOR last, DISTANCE_FUNC distance_func,
OUTPUT_ITERATOR result, const double epsilon, const int n_min) {
const size_t input_size = std::distance(first, last);
std::vector<int> id_cache(input_size, -1);
int idx = 0;
int current_cluster_id = 0;
for (INPUT_ITERATOR it = first; it != last; ++it, ++idx) {
// Visit every element only once.
if (id_cache[idx] >= 0) continue;
// Gather neighborhood.
std::vector<INPUT_ITERATOR> neighborhood; neighborhood.reserve(input_size);
_DBSCAN_PRIVATE::region_query(first, last, it, distance_func, std::back_inserter(neighborhood), epsilon);
if ((int)neighborhood.size() < n_min) {
// It's noise.
id_cache[idx] = 0;
} else {
// It's the seed of a cluster.
_DBSCAN_PRIVATE::expand_cluster(first, last, it, distance_func, epsilon, n_min, id_cache, ++current_cluster_id);
}
}
std::copy(id_cache.begin(), id_cache.end(), result);
return current_cluster_id;
}
} /* namespace Algorithm */
} /* namespace ACG */
#endif /* DBSCAN_HH_ */
...@@ -129,3 +129,19 @@ target_link_libraries ( ACG OpenMeshCore ...@@ -129,3 +129,19 @@ target_link_libraries ( ACG OpenMeshCore
${GLEW_LIBRARY} ${GLEW_LIBRARY}
${GLUT_LIBRARIES} ${GLUT_LIBRARIES}
${ADDITIONAL_LINK_LIBRARIES} ) ${ADDITIONAL_LINK_LIBRARIES} )
find_package(GoogleTest)
if (GTEST_FOUND)
enable_testing()
file(GLOB_RECURSE TEST_SOURCES tests/*.cc)
set(TESTED_SOURCES
Algorithm/DBSCANT.cc
)
include_directories(${GTEST_INCLUDE_DIRS} ${OPENMESH_INCLUDE_DIR})
link_directories ( ${GTEST_LIBRARY_DIR})
add_executable (ACG_tests ${TEST_SOURCES})
target_link_libraries(ACG_tests
${GTEST_LIBRARIES} ${OPENMESH_LIBRARY}
)
add_test(AllTestsIn_ACG_tests ACG_tests)
endif(GTEST_FOUND)
/*
* DBSCAN_test.cpp
*
* Created on: May 18, 2012
* Author: ebke
*/
#include <gtest/gtest.h>
#include <vector>
#include <map>
#include <cmath>
#include <cstring>
#include "../../Algorithm/DBSCANT.hh"
namespace {
const char * const test1_map[] = {
" ",
" . ",
" ",
" a b ",
" ",
" a b b b ",
" aa b b b ",
" aaaa . . b b b bbb b ",
" aa b b ",
" a a a a ",
" a a a a a a ",
" a a a ",
" ",
" aaa ",
" ",
" ",
" ",
" . a cc ",
" cc ",
" .. ",
" . ",
" ",
0 };
class Point {
public:
Point(double x, double y, char classifier) : x(x), y(y), classifier(classifier) {}
double length() const {
return std::sqrt(x*x + y*y);
}
Point operator- (const Point &rhs) const {
return Point(x-rhs.x, y-rhs.y, classifier);
}
double dist(const Point &rhs) const {
return operator-(rhs).length();
}
class DistanceFunc {
public:
double operator() (const Point &a, const Point &b) const {
return a.dist(b);
}
};
double x, y;
char classifier;
};
template<class OSTREAM>
OSTREAM &operator<< (OSTREAM &stream, const Point &point) {
return stream << "(" << point.x << ", " << point.y << ", " << "'" << point.classifier << "'" << ")";
}
template<class OUTPUT_ITERATOR>
void parse_points(const char * const * input, OUTPUT_ITERATOR points_out) {
int y = 0;
for (; *input != 0; ++input, ++y) {
int x = 0;
for (const char *it = *input; *it != 0; ++it, ++x) {
if (!isspace(*it)) {
*points_out++ = Point(x, y, *it);
}
}
}
}
testing::AssertionResult checkClusterConsistency(const std::vector<Point> &points, const std::vector<int> &cluster_map) {
std::map<int, char> cluster_2_classifier;
std::vector<int>::const_iterator cluster_it = cluster_map.begin();
for (std::vector<Point>::const_iterator point_it = points.begin(), point_it_end = points.end();
point_it != point_it_end; ++point_it, ++cluster_it) {
std::map<int, char>::const_iterator map_it = cluster_2_classifier.find(*cluster_it);
if (map_it == cluster_2_classifier.end()) {
cluster_2_classifier[*cluster_it] = point_it->classifier;
if (point_it->classifier == '.' && *cluster_it != 0) {
return testing::AssertionFailure() << "Noise point " << *point_it << " was mapped to non-noise cluster " << *cluster_it << ".";
}
if (*cluster_it == 0 && point_it->classifier != '.') {
return testing::AssertionFailure() << "Non-noise point " << *point_it << " was mapped to noise cluster (0).";
}
} else {
if (map_it->second != point_it->classifier) {
return testing::AssertionFailure() << "Point " << *point_it << " was mapped to cluster '" << map_it->second << "'.";
}
}
}
return testing::AssertionSuccess() << "All points were mapped to clusters as expected.";
}
TEST(DBSCAN, manual_test_1) {
std::vector<Point> points;
parse_points(test1_map, std::back_inserter(points));
std::vector<int> clusters;
EXPECT_EQ(3,
ACG::Algorithm::DBSCAN(points.begin(), points.end(), Point::DistanceFunc(),
std::back_inserter(clusters), 4.0001, 3));
EXPECT_TRUE(checkClusterConsistency(points, clusters));
}
}
/*
* main.cc
*
* Created on: May 18, 2012
* Author: ebke
*/
#include "gtest/gtest.h"
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment