$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
From: franklin.jonathan_at_[hidden]
Date: 2008-05-08 13:40:52
Author: jfranklin
Date: 2008-05-08 13:40:52 EDT (Thu, 08 May 2008)
New Revision: 45221
URL: http://svn.boost.org/trac/boost/changeset/45221
Log:
Moved the cluster_data template into its own header.
Cleaned up the code slightly, and added some doc comments.
Added:
   sandbox/boost/algorithm/cluster/cluster_data.hpp   (contents, props changed)
Text files modified: 
   sandbox/boost/algorithm/cluster/dbscan.hpp |    80 ++++++++++++--------------------------- 
   1 files changed, 25 insertions(+), 55 deletions(-)
Added: sandbox/boost/algorithm/cluster/cluster_data.hpp
==============================================================================
--- (empty file)
+++ sandbox/boost/algorithm/cluster/cluster_data.hpp	2008-05-08 13:40:52 EDT (Thu, 08 May 2008)
@@ -0,0 +1,63 @@
+#if ! defined BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
+#define BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
+
+#include <boost/shared_ptr.hpp>
+#include <vector>
+
+namespace boost
+{
+namespace algorithm
+{
+namespace cluster
+{
+
+/*! TODO: Document this type.
+ */
+template<typename Cluster>
+struct cluster_data
+{
+  typedef Cluster value_type;
+  typedef std::vector<value_type> clusters;
+  cluster_data() : m_pClusters(new clusters) {}
+  ~cluster_data() {}
+
+  cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
+  cluster_data const & cluster_data::operator=(cluster_data const & rhs)
+  { m_pClusters = rhs.m_pClusters; }
+
+  typedef typename clusters::iterator iterator;
+  typedef typename clusters::const_iterator const_iterator;
+  typedef typename clusters::reverse_iterator reverse_iterator;
+
+  iterator begin() { return m_pClusters->begin(); }
+  iterator end() { return m_pClusters->end(); }
+
+  const_iterator begin() const { return m_pClusters->begin(); }
+  const_iterator end() const { return m_pClusters->end(); }
+
+  iterator rbegin() { return m_pClusters->rbegin(); }
+  iterator rend() { return m_pClusters->rend(); }
+
+  iterator insert(iterator loc, value_type const & val)
+  { return m_pClusters->insert(loc, val); }
+
+  void push_back(value_type const & v) { m_pClusters->push_back(v); }
+  void pop_back() { m_pClusters->pop_back(); }
+
+  value_type & back() { return m_pClusters->back(); }
+  value_type const & back() const { return m_pClusters->back(); }
+
+private:
+  boost::shared_ptr<clusters> m_pClusters;
+};
+
+} // End of namespace cluster
+
+// TODO: Should we be exporting this?
+using namespace cluster;
+
+} // End of namespace algorithm
+
+} // End of namespace boost
+
+#endif // BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
Modified: sandbox/boost/algorithm/cluster/dbscan.hpp
==============================================================================
--- sandbox/boost/algorithm/cluster/dbscan.hpp	(original)
+++ sandbox/boost/algorithm/cluster/dbscan.hpp	2008-05-08 13:40:52 EDT (Thu, 08 May 2008)
@@ -1,11 +1,8 @@
 #if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
 #define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
 
-#include <boost/range/begin.hpp>
-#include <boost/range/end.hpp>
-#include <boost/shared_ptr.hpp>
+#include <boost/algorithm/cluster/cluster_data.hpp>
 #include <vector>
-#include <list>
 
 namespace boost
 {
@@ -18,13 +15,13 @@
 {
 
 // TODO: Replace this naive query function w/ R*-tree or fractional cascading.
-// It makes the runtime quadratic.
+// This query mechanism makes the runtime quadratic.
 template<typename NTupleIter, typename DistFun>
 static void query(
   NTupleIter const & query_pt,
   NTupleIter const & begin,
   NTupleIter const & end,
-  float eps,
+  typename NTupleIter::difference_type eps,
   DistFun const & d,
   std::vector<NTupleIter> & v)
 {
@@ -40,7 +37,7 @@
   }
 }
 
-// TODO: Replace this so we don't have to store the cluster info for each tuple.
+// TODO: Replace this so we don't have to store the cluster info for each tuple?
 template<typename NTupleIter>
 struct node
 {
@@ -52,46 +49,14 @@
 
 } // End of namespace detail.
 
-// TODO: Document this type.
-template<typename Cluster>
-struct cluster_data
-{
-  typedef Cluster value_type;
-  typedef std::vector<value_type> clusters;
-  cluster_data() : m_pClusters(new clusters) {}
-  ~cluster_data() {}
-
-  cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
-  cluster_data const & cluster_data::operator=(cluster_data const & rhs)
-  { m_pClusters = rhs.m_pClusters; }
-
-  typedef typename clusters::iterator iterator;
-  typedef typename clusters::const_iterator const_iterator;
-  typedef typename clusters::reverse_iterator reverse_iterator;
-
-  iterator begin() { return m_pClusters->begin(); }
-  iterator end() { return m_pClusters->end(); }
-
-  const_iterator begin() const { return m_pClusters->begin(); }
-  const_iterator end() const { return m_pClusters->end(); }
-
-  iterator rbegin() { return m_pClusters->rbegin(); }
-  iterator rend() { return m_pClusters->rend(); }
-
-  iterator insert(iterator loc, value_type const & val)
-  { return m_pClusters->insert(loc, val); }
-
-  void push_back(value_type const & v) { m_pClusters->push_back(v); }
-  void pop_back() { m_pClusters->pop_back(); }
-
-  value_type & back() { return m_pClusters->back(); }
-  value_type const & back() const { return m_pClusters->back(); }
-
-private:
-  boost::shared_ptr<clusters> m_pClusters;
-};
-
-/**
+/*! DBSCAN density-based clustering algorithm.
+ * TODO: Document this function.
+ * \param[in] begin
+ * \param[in] end
+ * \param[in] eps
+ * \param[in] min_points
+ * \param[in] d
+ * \return The cluster data (partitioning of the tuples).
  */
 template<typename Cluster, typename NTupleIter, typename DistFun>
 cluster_data<Cluster>
@@ -101,7 +66,10 @@
        size_t min_points,
        DistFun const & d)
 {
-  // TODO: Rework the algorithm to NOT make this extra collection.
+  int const UNCLASSIFIED = -1;
+  int const NOISE = 0;
+
+  // TODO: Rework the algorithm to NOT make this extra collection?
   typedef detail::node<NTupleIter> node;
   typedef std::vector<node> ntuple_nodes;
   ntuple_nodes tuples;
@@ -111,24 +79,25 @@
   for(NTupleIter it = begin; it != end; ++it)
   {
     //++num_elems;
-    //it->cluster = UNCLASSIFIED;
     tuples.push_back(node(it));
   }
 
   typedef cluster_data<std::vector<NTupleIter> > cluster_data;
   cluster_data p;
 
-  // Do it...
+  // TODO: We should try to make cluster_num go away.
   int cluster_num = 0;
   for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it)
   {
-    if (it->cluster != UNCLASSIFIED) // Been classified.
+    // Skip this tuple if its already been classified as a cluster or noise.
+    if (it->cluster != UNCLASSIFIED)
       continue;
 
     // Expand cluster.
 
     std::vector<ntuple_nodes::iterator> seeds;
     detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds);
+    // If the neighborhood of this tuple is too small, then mark it as noise.
     if (seeds.size() < min_points)
     {
       it->cluster = NOISE;
@@ -137,20 +106,20 @@
 
     // Start the next cluster.
     ++cluster_num;
-    p.push_back(Cluster());
+    p.push_back(Cluster()); // TODO: This is goofy.
     Cluster & cur_cluster = p.back();
 
-    // Mark entire neighborhood as part of current cluster.
+    // Mark entire neighborhood as part of the current cluster.
     it->cluster = cluster_num;
     cur_cluster.push_back(it->tuple);
-    // TODO: Remove it from noise.
     for (size_t n = 0; n < seeds.size(); ++n)
     {
       seeds[n]->cluster = cluster_num;
       cur_cluster.push_back(seeds[n]->tuple);
-      // TODO: Remove it from noise.
     }
 
+    // Keep adding seeds and processing them until we find all points that
+    // are Density Reachable.
     while (! seeds.empty())
     {
       ntuple_nodes::iterator cur = seeds.back();
@@ -181,6 +150,7 @@
 
 } // End of namespace cluster
 
+// TODO: Should we be exporting this?
 using namespace cluster;
 
 } // End of namespace algorithm