$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r64003 - in sandbox/SOC/2010/stringalgos: boost/algorithm/string boost/algorithm/string/detail boost/algorithm/string/string_search boost/algorithm/string/string_search/detail libs/algorithm/string/test
From: mstefanro_at_[hidden]
Date: 2010-07-13 21:24:21
Author: mstefanro
Date: 2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
New Revision: 64003
URL: http://svn.boost.org/trac/boost/changeset/64003
Log:
[GSoC2010][StringAlgo] Boyer-Moore and Suffix Array Search (improvement required on both, latter needs different algorithm for more efficiency); some bugfixes.
Added:
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search.hpp   (contents, props changed)
Text files modified: 
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp                    |    46 +++++++                                 
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp                           |   243 ++++++++++++++++++--------------------- 
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp        |   122 +++++++++++++++++++                     
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/detail/rabin_karp.hpp  |     2                                         
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp |     6                                         
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp       |    97 +++++++--------                         
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp         |    53 +-------                                
   sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp       |   199 ++++++++++++++++++++++++++++++++        
   sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp                  |    23 +++                                     
   9 files changed, 552 insertions(+), 239 deletions(-)
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/detail/finder.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -21,9 +21,49 @@
 #include <boost/range/empty.hpp>
 #include <boost/range/as_literal.hpp>
 
-namespace boost {
-    namespace algorithm {
-        namespace detail {
+#include <boost/iterator/iterator_traits.hpp>
+
+#include <boost/mpl/and.hpp>
+#include <boost/type_traits/remove_cv.hpp>
+
+namespace boost { namespace algorithm { namespace detail {
+    template <class T, class U> struct is_pointer_to :
+        boost::mpl::and_<
+            typename boost::is_pointer<T>,
+            typename boost::is_same<
+                typename boost::remove_cv<typename boost::remove_pointer<T>::type>::type,
+                U>
+        > {};
+    template <class Range1T, class Range2T, class ComparatorT, class AllocatorT>
+    struct finder_typedefs
+    {
+        //! The type of the substring
+        typedef Range1T substring_type;
+        //! The type of the string
+        typedef Range2T string_type;
+        //! The type of the comparator
+        typedef ComparatorT comparator_type;
+        //! The type of the allocator
+        typedef AllocatorT allocator_type;
+        //! The type of the substring's iterator
+        typedef typename boost::range_const_iterator<substring_type>::type
+            substring_iterator_type;
+        //! The type of the string's iterator
+        typedef typename boost::range_const_iterator<string_type>::type
+            string_iterator_type;
+        //! The character type of the substring
+        typedef typename boost::iterator_value<substring_iterator_type>::type
+            substring_char_type;
+        //! The character type of the string
+        typedef typename boost::iterator_value<string_iterator_type>::type
+            string_char_type;
+        typedef typename boost::iterator_range<substring_iterator_type>
+            substring_range_type;
+        typedef typename boost::iterator_range<string_iterator_type>
+            string_range_type;
+        typedef typename boost::iterator_difference<string_iterator_type>::type
+            string_difference_type;
+    };
 
 
 //  find first functor -----------------------------------------------//
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/finder.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -45,66 +45,116 @@
     for finders provided in this library.
 */
 
-//! \todo Move this to an appropriate header in detail/
-namespace boost { namespace detail {
-    template <class T, class U> struct is_pointer_to :
-        boost::mpl::and_<
-            typename boost::is_pointer<T>,
-            typename boost::is_same<
-                typename boost::remove_cv<typename boost::remove_pointer<T>::type>::type,
-                U>
-        > {};
-    template <class Range1T, class Range2T, class ComparatorT, class AllocatorT>
-    struct finder_typedefs
-    {
-        //! The type of the substring
-        typedef Range1T substring_type;
-        //! The type of the string
-        typedef Range2T string_type;
-        //! The type of the comparator
-        typedef ComparatorT comparator_type;
-        //! The type of the allocator
-        typedef AllocatorT allocator_type;
-        //! The type of the substring's iterator
-        typedef typename boost::range_const_iterator<substring_type>::type
-            substring_iterator_type;
-        //! The type of the string's iterator
-        typedef typename boost::range_const_iterator<string_type>::type
-            string_iterator_type;
-        //! The character type of the substring
-        typedef typename boost::iterator_value<substring_iterator_type>::type
-            substring_char_type;
-        //! The character type of the string
-        typedef typename boost::iterator_value<string_iterator_type>::type
-            string_char_type;
-        typedef typename boost::iterator_range<substring_iterator_type>
-            substring_range_type;
-        typedef typename boost::iterator_range<string_iterator_type>
-            string_range_type;
-        typedef typename boost::iterator_difference<string_iterator_type>::type
-            string_difference_type;
-    };
-} // namespace detail
-} // namespace boost
-
 namespace boost { namespace algorithm {
 
 
 //  Finder types ---------------------------------------------//
 
-        template <class,class,class,class> struct finder_no_additional_behavior;
-        template <class,class,class,class> class finder_profiling_behavior;
-        template <class,class,class,class> class finder_functor_first_finder;
-        template <class,class,class,class> class finder_functor_last_finder;
+        template <class,class,class,class,class,class> struct finder_no_additional_behavior;
+        template <class,class,class,class,class,class> class finder_profiling_behavior;
+        template <class,class,class,class,class,class> class finder_functor_first_finder;
+        template <class,class,class,class,class,class> class finder_functor_last_finder;
+        template <class,class,class,class,class,class> class finder_functor_nth_finder;
+
+
+        template <class Range1T, class Range2T, class AlgorithmT,
+            class ComparatorT = ::boost::algorithm_is_equal,
+            class AllocatorT = typename AlgorithmT::default_allocator_type,
+            template <class,class,class,class,class,class> class AdditionalBehaviorT =
+                boost::algorithm::finder_no_additional_behavior
+        >
+        class simplified_finder_t :
+            public boost::algorithm::detail::finder_typedefs<Range1T,Range2T,ComparatorT,AllocatorT>,
+            private AlgorithmT::algorithm<
+                simplified_finder_t<Range1T, Range2T, AlgorithmT, ComparatorT, AllocatorT, AdditionalBehaviorT>,
+                typename boost::range_const_iterator<Range1T>::type,
+                typename boost::range_const_iterator<Range2T>::type,
+                ComparatorT,AllocatorT>
+        {
+            //! \todo Add concept assertions here.
+
+        public:
+            simplified_finder_t(Range1T const &substr, Range2T const &str,
+                ComparatorT comparator = ComparatorT(), AllocatorT allocator = AllocatorT())
+                : substring_range_(substr), string_range_(str),
+                comparator_(comparator), allocator_(allocator),
+                substring_has_changed_(true), string_has_changed_(true),
+                algorithm()
+            { }
+
+            void set_substring (Range1T const &substr)
+            { substring_range_ = substr; substring_has_changed_ = true; }
+
+            void set_string (Range2T const &str)
+            { string_range_ = str; string_has_changed_ = true; }
+
+            void find_reset ()
+            { start_offset_ = boost::begin(string_range_); }
+            
+            string_range_type find_first ()
+            {
+                find_reset();
+                return find_next();
+            }
+
+            string_range_type find_next()
+            {
+                apply_changes();
+                if (start_offset_ == boost::end(string_range_))
+                    return string_range_type(start_offset_, start_offset_);
+                string_range_type ret =
+                    algorithm_type::find(start_offset_);
+                if (boost::begin(ret) == boost::end(string_range_))
+                {
+                    start_offset_ = boost::end(string_range_);
+                    return ret;
+                }
+                else
+                {
+                    start_offset_ = boost::begin(ret);
+                    ++start_offset_;
+                    return ret;
+                }
+            }
+
+            substring_range_type get_substring_range() const { return substring_range_; }
+            string_range_type get_string_range() const { return string_range_; }
+            
+            typename boost::call_traits<comparator_type>::reference get_comparator()
+            { return comparator_; }
+
+            typename boost::call_traits<comparator_type>::const_reference get_comparator() const
+            { return comparator_;  }
+       
+            //! Gets a reference to the current allocator
+            typename boost::call_traits<allocator_type>::reference get_allocator()
+            { return allocator_; }
+            
+            /*!
+                \overload
+            */
+            typename boost::call_traits<allocator_type>::const_reference get_allocator() const
+            { return allocator_; }
+
+        private:
+            substring_range_type substring_range_;
+            string_range_type string_range_;
+            bool substring_has_changed_, string_has_changed_;
 
-        //! \todo copyable finder types?
+            comparator_type comparator_;
+            allocator_type allocator_;
+
+            string_iterator_type start_offset_;
+        };
+
+        //! \todo copyable finder types
 
         //! A generic finder type
         /*!
             Allows simple use of various string searching algorithms.
             \tparam Sequence1T The type of the substring
             \tparam Sequence2T The type of the string
-            \tparam Algorithm An algorithm policy class
+            \tparam Algorithm An algorithm class
                 which will be used for performing the searches. \see string_search.hpp
             \tparam Comparator Optional template parameter passed to the algorithm.
                 Used for comparing individual characters for equality.
@@ -116,7 +166,8 @@
             class Algorithm,
             typename Comparator = ::boost::algorithm::is_equal,
             class Allocator = typename Algorithm::default_allocator_type,
-            template <class,class,class,class> class AdditionalBehavior = boost::algorithm::finder_no_additional_behavior
+            template <class,class,class,class,class,class> class AdditionalBehavior =
+                boost::algorithm::finder_no_additional_behavior
         >
         class finder_t : private Algorithm::algorithm</*typename boost::range_const_iterator<Sequence1T>::type,
                         typename boost::range_const_iterator<Sequence2T>::type,
@@ -125,7 +176,9 @@
                 typename boost::range_const_iterator<Sequence1T>::type,
                 typename boost::range_const_iterator<Sequence2T>::type,
                 Comparator, Allocator>,
-            private AdditionalBehavior<Sequence1T,Sequence2T,Algorithm,Comparator>
+            private AdditionalBehavior<
+                typename finder_t<Sequence1T, Sequence2T, Algorithm, Comparator, Allocator, AdditionalBehavior>,
+                Sequence1T,Sequence2T,Algorithm,Comparator,Allocator>
         {
             //! \todo: Maybe write a String concept?
             //! \todo: Currently, there's a SGI Sequence Concept implemented by Boost.ConceptCheck,
@@ -191,7 +244,7 @@
             template <class Range2T>
             finder_t (const Sequence1T *const substring, const Range2T &string,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator(),
-                typename boost::disable_if<typename ::boost::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0)
+                typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0)
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(), substring_range_(substring?*substring:substring_optional_copy_),
                 string_optional_copy_(), string_range_(),
@@ -207,7 +260,7 @@
             template <class Range1T>
             explicit finder_t (const Range1T &substring, const Sequence2T *const string = 0,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator(),
-                typename boost::disable_if<typename ::boost::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
+                typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0)
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(), substring_range_(),
                 string_optional_copy_(), string_range_(string?boost::as_literal(*string):string_optional_copy_),
@@ -223,8 +276,8 @@
             template <class Range1T, class Range2T>
             finder_t (const Range1T &substring, const Range2T &string,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator(),
-                typename boost::disable_if<boost::mpl::or_<typename ::boost::detail::is_pointer_to<Range1T,Sequence1T>,
-                        typename ::boost::detail::is_pointer_to<Range2T,Sequence2T> > >::type* = 0) 
+                typename boost::disable_if<boost::mpl::or_<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T>,
+                        typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> > >::type* = 0) 
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(), substring_range_(),
                 string_optional_copy_(), string_range_(),
@@ -257,7 +310,7 @@
                 Sequence1T const &&substring,
                 const Range2T &string,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator(),
-                typename boost::disable_if<typename ::boost::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0) 
+                typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range2T,Sequence2T> >::type* = 0) 
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(std::move(substring)), substring_range_(substring_optional_copy_),
                 string_optional_copy_(), string_range_(),
@@ -296,7 +349,7 @@
             finder_t (const Range1T &substring,
                 Sequence2T const &&string,
                 Comparator comparator = Comparator(), Allocator allocator = Allocator(),
-                typename boost::disable_if<typename ::boost::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0) 
+                typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<Range1T,Sequence1T> >::type* = 0) 
                 : comparator_(comparator), allocator_(allocator),
                 substring_optional_copy_(), substring_range_(),
                 string_optional_copy_(std::move(string)), string_range_(string_optional_copy_),
@@ -350,7 +403,7 @@
             template <typename RangeT>
             void set_substring(RangeT const &substring,
                 typename boost::disable_if<
-                    typename ::boost::detail::is_pointer_to<RangeT,Sequence1T> >::type* = 0)
+                    typename ::boost::algorithm::detail::is_pointer_to<RangeT,Sequence1T> >::type* = 0)
             {
                 boost::iterator_range<typename boost::range_const_iterator<RangeT>::type> substring_range = 
                     boost::as_literal(substring);
@@ -389,7 +442,7 @@
              */
             template <typename RangeT>
             void set_string(RangeT const &string,
-                typename boost::disable_if<typename ::boost::detail::is_pointer_to<RangeT,Sequence2T> >::type* = 0)
+                typename boost::disable_if<typename ::boost::algorithm::detail::is_pointer_to<RangeT,Sequence2T> >::type* = 0)
             {
                 boost::iterator_range<typename boost::range_const_iterator<RangeT>::type> string_range = 
                     boost::as_literal(string);
@@ -421,32 +474,6 @@
             }
 #           endif
 
-            //! Notify the finder that the pointees have changed
-            /*! If the substring or the string have been passed as pointers, the finder
-                cannot know when the pointees have changed. Therefore, this function
-                must be called before any call to find().
-                \todo Give it more thought
-            */
-            void refresh()
-            {
-                find_reset();
-                on_substring_change(); //! \todo should only be called if the substring is a pointer
-                on_string_change(); //! \todo should only be called if the string is a pointer
-            }
-
-            //! \todo doc et al
-            void refresh_substring()
-            {
-                on_substring_change();
-                find_reset();
-            }
-            
-            void refresh_string()
-            {
-                on_string_change();
-                find_reset();
-            }
-
             //! \todo Change the object's substring or just use a temporary one?
             //! \todo Maybe this shouldn't be a part of finder_t, but a part of a certain AdditionalBehaviorT
             //! Perform a search...
@@ -553,63 +580,19 @@
                     }
                 }
             }
-            //detail::make_properly_copyable<substring_range_type> substring_optional_copy_;
-            //detail::make_properly_copyable<string_range_type> string_optional_copy_;
-            /*typename std::basic_string<substring_char_type, std::char_traits<substring_char_type>, allocator_type>
-                substring_optional_copy_;*/
+
             substring_type substring_optional_copy_;
             string_type string_optional_copy_;
-            /*typename std::basic_string<string_char_type, std::char_traits<string_char_type>, allocator_type>
-                string_optional_copy_;*/
             comparator_type comparator_;
             allocator_type allocator_;
-            
-            //! \TODO make it so that pointers are stored here when appropriate (or must be 0 otherwise)
-            //!         these will be used when refresh_substring()/refresh_string() are called
-            //!         and are used to update substring_range_ and string_range_ via begin()+end()
-            //!         then those functions will update the ranges of the algorithm and call
-            //!         on_substring_change()/on_string_change().
-            //! \TODO think if it wouldn't be better if the algorithms could somehow use pointers to ranges or some stuff like that.
-
-            Sequence1T *substring_pointer_;
-            Sequence2T *string_pointer_;
-            
-            //! \todo Somehow fix this. This makes us able to
-            //!       only store ranges of type substring_iterator_type, and not of std::basic_string.
             substring_range_type substring_range_;
             string_range_type string_range_;
             string_iterator_type start_offset_;
             bool substring_has_changed_, string_has_changed_;
         };
 
-        template <class,class,class,class> struct finder_no_additional_behavior { };
+        template <class,class,class,class,class,class> struct finder_no_additional_behavior { };
 
-        /*
-        template <class Range1T, class Range2T, class Algorithm,
-            class Comparator = ::boost::algorithm_is_equal,
-            class Allocator = 
-                typename Algorithm::algorithm<
-                    typename boost::range_const_iterator<Range1T>::type,
-                    typename boost::range_const_iterator<Range2T>::type,
-                    Comparator>::allocator_type
-        >
-        class simplified_finder_t :
-            public boost::algorithm::detail::finder_typedefs<Range1T,Range2T,ComparatorT,AllocatorT>,
-            private Algorithm::algorithm<
-                typename boost::range_const_iterator<Sequence1T>::type,
-                typename boost::range_const_iterator<Sequence2T>::type,
-                Comparator,Allocator>
-        {
-            //! \todo Add concept assertions here.
-
-        public:
-            simplified_finder_t(Range1T const *const substr = 0, Range2T const *const str = 0)
-                : substr_(substr), str_(str), algorithm() { }
-        private:
-            Range1T const *const substr_;
-            Range2T const *const str_;
-        };
-        */
 
 //  Finder generators ------------------------------------------//
         
Added: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -0,0 +1,12 @@
+#ifndef BOOST_ALGORITHM_STRING_SEARCH_HPP
+#define BOOST_ALGORITHM_STRING_SEARCH_HPP
+
+//! \todo Make sure this header is included by boost/algorithm/string.hpp
+
+#include <boost/algorithm/string/string_search/naive_search.hpp>
+#include <boost/algorithm/string/string_search/rabin_karp.hpp>
+#include <boost/algorithm/string/string_search/knuth_morris_pratt.hpp>
+#include <boost/algorithm/string/string_search/boyer_moore.hpp>
+#include <boost/algorithm/string/string_search/suffix_array.hpp>
+
+#endif
\ No newline at end of file
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/boyer_moore.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -3,7 +3,17 @@
 
 #include <iterator>
 #include <allocators>
+#include <utility>
 #include <vector>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
+#include <boost/range/adaptor/reversed.hpp>
+#include <boost/range/algorithm/for_each.hpp>
+#include <boost/range/distance.hpp>
+#include <boost/call_traits.hpp>
+#include <boost/type_traits/is_same.hpp>
+#include <boost/static_assert.hpp>
+#include <map>
 
 namespace boost { namespace algorithm {
     struct boyer_moore
@@ -17,11 +27,18 @@
         public:
             typedef ForwardIterator1T substring_iterator_type;
                 typedef ForwardIterator2T string_iterator_type;
-            typedef typename std::iterator_traits<substring_iterator_type>::value_type substring_char_type;
+            typedef typename
+                std::iterator_traits<substring_iterator_type>::value_type substring_char_type;
             typedef typename std::iterator_traits<string_iterator_type>::value_type string_char_type;
             typedef typename boost::iterator_range<substring_iterator_type> substring_range_type;
             typedef typename boost::iterator_range<string_iterator_type> string_range_type;
+            typedef Comparator comparator_type;
+            typedef Allocator allocator_type;
         protected:
+            algorithm () {
+                BOOST_STATIC_ASSERT((boost::is_same<substring_char_type,string_char_type>::value));
+            }
+
             string_range_type find(string_iterator_type start)
             {
                 return find(start, std::iterator_traits<ForwardIterator2T>::iterator_category());
@@ -34,25 +51,120 @@
             }
             //No precomputation to be done on the string
             inline void on_string_change()
-            {
-
-            }
+            { }
         private:
-            void on_substring_change(std::random_access_iterator_tag)
+            struct compute_first_table_functor {
+                void operator()(typename boost::call_traits<substring_char_type>::param_type c)
+                {
+                    //alg_.table1.insert( std::make_pair(c, alg_.substr_size_ - 1 - idx_) );
+                    if (idx_ != 0) alg_.table1.insert(std::make_pair(c, idx_));
+
+                    ++idx_;
+                }
+                compute_first_table_functor (algorithm &alg) : idx_(0), alg_(alg)
+                { alg_.table1.clear(); }
+            private:
+                std::size_t idx_;
+                algorithm &alg_;
+            };
+
+            struct compute_second_table_functor {
+                void operator()(typename boost::call_traits<substring_char_type>::param_type c)
+                {
+
+                }
+                compute_second_table_functor (algorithm &alg) : idx_(0), alg_(alg)
+                { /*alg_.table2.clear();*/ }
+            private:
+                std::size_t idx_;
+                algorithm &alg_;
+            };
+
+            //precomputation on pattern=bidirectional range
+            void on_substring_change(std::bidirectional_iterator_tag)
             {
                 substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
                 comparator_type const &comp = static_cast<Finder*>(this)->get_comparator();
+                
+                substr_size_ = boost::distance(substr);
 
+                //Compute the first table
+                boost::for_each(substr | boost::adaptors::reversed,
+                    compute_first_table_functor(*this));
+                
+                //Compute the second table
+                boost::for_each(substr | boost::adaptors::reversed,
+                    compute_second_table_functor(*this));
             }
 
+            //finding in text=random access range
             string_range_type find(string_iterator_type start, std::random_access_iterator_tag)
             {
                 string_range_type const &str = static_cast<Finder*>(this)->get_string_range();
                 substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
                 comparator_type const &comp = static_cast<Finder*>(this)->get_comparator();
+
+                std::size_t str_idx, substr_idx, str_size;
+
+                if (substr_size_ == 0)
+                    return string_range_type(start, start);
+
+                str_size = boost::end(str) - start;
+                for (str_idx = substr_size_ - 1, substr_idx = substr_size_ - 1; str_idx < str_size;)
+                {
+                    if (comp(start[str_idx],
+                        boost::begin(substr)[substr_idx]))
+                    {
+                        if (substr_idx == 0)
+                        {
+                            return string_range_type(start+str_idx,start+str_idx+substr_size_);
+                        }
+                        else { assert(str_idx > 0); --substr_idx; --str_idx; }
+                    }
+                    else
+                    {
+                        table1_type::const_iterator iter = table1.find(start[str_idx]);
+                        if (iter == table1.end())
+                        {
+                            str_idx += substr_size_ - substr_idx;
+                            substr_idx = substr_size_ - 1;
+                        }
+                        else if (iter->second > substr_idx)
+                        {
+                            str_idx += iter->second;
+                            substr_idx = substr_size_ - 1;
+                            //str_idx += iter->second - substr_idx + substr_size_ - 1 - substr_idx;
+                            //substr_idx = substr_size_ - 1;
+                            //substr_idx = substr_size_ - 1 - iter->second;
+                            //str_idx += substr_size_ - 1 - substr_idx;
+                            //substr_idx = substr_size_ - 1;
+                        }
+                        else
+                        {
+                            assert(substr_size_ >= substr_idx);
+                            str_idx += substr_size_-substr_idx;
+                            substr_idx = substr_size_ - 1;
+                        }
+                    }
+                }
+                return string_range_type(boost::end(str), boost::end(str));
             }
+
+            //!\todo Get a better data structure here (hash table?)
+            //!\todo Find a better way for custom allocators here
+            typedef typename std::map<substring_char_type, std::size_t,
+                std::less<substring_char_type>,
+                typename std::allocator<std::pair<const substring_char_type, std::size_t> >
+            > table1_type;
+
+            table1_type table1;
+
+            std::size_t substr_size_;
+
         };
     };
 } }
 
+namespace boost { using boost::algorithm::boyer_moore; }
+
 #endif
\ No newline at end of file
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/detail/rabin_karp.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/detail/rabin_karp.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/detail/rabin_karp.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -9,6 +9,8 @@
 #include <boost/range/iterator.hpp>
 #include <boost/call_traits.hpp>
 #include <iterator>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
 
 namespace boost { namespace algorithm { namespace detail {
 
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/knuth_morris_pratt.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -3,6 +3,8 @@
 
 #include <iterator>
 #include <boost/range/iterator_range.hpp>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
 #include <vector>
 #include <allocators>
 
@@ -80,6 +82,8 @@
                 substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
                 comparator_type const &comp = static_cast<Finder*>(this)->get_comparator();
 
+                //!\todo clear the container first
+                failure_func.clear();
                 failure_func.reserve(boost::end(substr) - boost::begin(substr));
                 std::size_t idx, q, substr_size = boost::end(substr) - boost::begin(substr);
                 failure_func.push_back(0); failure_func.push_back(0);
@@ -107,4 +111,6 @@
     };
 } }
 
+namespace boost { using boost::algorithm::knuth_morris_pratt; }
+
 #endif
\ No newline at end of file
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/naive_search.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -7,68 +7,63 @@
 #include <boost/range/begin.hpp>
 #include <boost/range/end.hpp>
 
-namespace boost
-{
-	namespace algorithm
+namespace boost { namespace algorithm {
+    //! \todo Copyable
+	struct naive_search
         {
-        //! \todo Copyable
-		struct naive_search
+        typedef boost::mpl::void_ default_allocator_type;
+
+		template <class Finder, class Iterator1T, class Iterator2T, class ComparatorT, class AllocatorT>
+        struct algorithm
                 {
-            typedef boost::mpl::void_ default_allocator_type;
+		protected:
+            algorithm () { }
 
-			template <class Finder, class Iterator1T, class Iterator2T, class ComparatorT, class AllocatorT>
-            struct algorithm
-			{
-			protected:
-                algorithm () { }
 
+			typename boost::iterator_range<Iterator2T> find(Iterator2T start)
+			{
+                typedef typename Finder::string_iterator_type string_iterator_type;
+                typedef typename Finder::substring_iterator_type substring_iterator_type;
+                typedef typename Finder::substring_range_type substring_range_type;
+                typedef typename Finder::string_range_type string_range_type;
+                typedef typename Finder::comparator_type comparator_type;
+                string_range_type const &str = static_cast<Finder*>(this)->get_string_range();
+                substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
+                comparator_type comparator = static_cast<Finder*>(this)->get_comparator();
 
-				typename boost::iterator_range<Iterator2T> find(Iterator2T start)
+				for (;
+					start != boost::end(str); ++start)
                                 {
-                    typedef typename Finder::string_iterator_type string_iterator_type;
-                    typedef typename Finder::substring_iterator_type substring_iterator_type;
-                    typedef typename Finder::substring_range_type substring_range_type;
-                    typedef typename Finder::string_range_type string_range_type;
-                    typedef typename Finder::comparator_type comparator_type;
-                    string_range_type const &str = static_cast<Finder*>(this)->get_string_range();
-                    substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
-                    comparator_type comparator = static_cast<Finder*>(this)->get_comparator();
-
-					for (;
-						start != boost::end(str); ++start)
+					string_iterator_type str_iter(start);
+					substring_iterator_type substr_iter;
+					for (substr_iter = boost::begin(substr);
+						substr_iter != boost::end(substr) && str_iter != boost::end(str);
+                        ++substr_iter, ++str_iter)
                                         {
-						string_iterator_type str_iter(start);
-						substring_iterator_type substr_iter;
-						for (substr_iter = boost::begin(substr);
-							substr_iter != boost::end(substr) && str_iter != boost::end(str);
-                            ++substr_iter, ++str_iter)
-						{
-							if (!comparator(*str_iter, *substr_iter)) break;
-						}
-						if (substr_iter == boost::end(substr))
-							return boost::iterator_range<string_iterator_type>(start, str_iter);
+						if (!comparator(*str_iter, *substr_iter)) break;
                                         }
-					return boost::iterator_range<string_iterator_type>(
-                        boost::end(str),boost::end(str));
+					if (substr_iter == boost::end(substr))
+						return boost::iterator_range<string_iterator_type>(start, str_iter);
                                 }
-                //! It is guaranteed that each of these two functions will get called at least once before find()
-                //! is used.
-                //No precomputation to be done on the substring
-                inline void on_substring_change()
-                {
-                }
-                //No precomputation to be done on the string
-                inline void on_string_change()
-                {
-                }
-			};
-
+				return boost::iterator_range<string_iterator_type>(
+                    boost::end(str),boost::end(str));
+			}
+            //! It is guaranteed that each of these two functions will get called at least once before find()
+            //! is used.
+            //No precomputation to be done on the substring
+            inline void on_substring_change()
+            {
+            }
+            //No precomputation to be done on the string
+            inline void on_string_change()
+            {
+            }
                 };
+
+	};
                 
-	}
+} }
         
-	using algorithm::naive_search;
-
-}
+namespace boost { using boost::algorithm::naive_search; }
 
 #endif
\ No newline at end of file
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/rabin_karp.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -9,6 +9,8 @@
 #include <boost/mpl/void.hpp>
 #include <cstdlib>
 #include <boost/static_assert.hpp>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
 
 
 #include <boost/algorithm/string/string_search/detail/rabin_karp.hpp>
@@ -42,59 +44,22 @@
                 protected:
             //construct the algorithm given iterator ranges for the substring and the string
             algorithm () {
+                //!\todo add more assertions here
                 BOOST_STATIC_ASSERT(
                     sizeof(boost::iterator_value<Iterator1T>::type)*2 <= sizeof(HashType)
                 );
             }
-            /*string_range_type find(string_iterator_type start)
-            {
-                if (start == boost::begin(string) && string_computed_start_offset_) on_string_change();
-
-            }
-
-            void on_substring_change()
-            {
-                first_substring_hash_ = static_cast<HashType>(0);
-                second_substring_hash_ = static_cast<HashType>(0);
-                substring_size_ = 0;
-                for (substring_iterator_type it = boost::begin(substring_); it != boost::end(substring_); ++it)
-                {
-                    substring_size_++;
-                    first_substring_hash_  = (first_substring_hash_*FirstBase + *it) % FirstModulo;
-                    second_substring_hash_ = (second_substring_hash_*SecondBase + *it) % SecondModulo;
-                }
-                if (string_computed_start_offset_ > 0) on_string_change();
-            }
-
-            void on_string_change()
-            {
-                first_string_hash_ = static_cast<HashType>(0);
-                second_string_hash_ = static_cast<HashType>(0);
-                string_computed_start_offset_ = 0;
-                string_computed_end_offset_ = 0;
-                for (string_iterator_type it = boost::begin(string_);
-                    it != boost::end(string_) && string_computed_end_offset_ < substring_size; ++it)
-                {
-                    ++string_computed_end_offset_;
-                    first_string_hash_ = (first_string_hash_ * FirstBase + *it) % FirstModulo;
-                    second_string_hash_ = (second_string_hash_ * SecondBase + *it) % SecondModulo;
-                }
-
-                //string_computed_upto = 
-            }*/
-
-
-			
+	
         };
     };
 
-    //1/3732152659 odds of collision. used with char
+    //1/3732152659 odds of collision. useful with char
     typedef rabin_karp_algorithm<boost::uint32_t,257,64433,277,57923> rabin_karp32;
-    //1/(N*M) odds of collision. used with wchar_t
-    typedef rabin_karp_algorithm<boost::uint64_t,1,2,3,4> rabin_karp64;
+    //1/150167080229379589 odds of collision. useful with wchar_t
+    typedef rabin_karp_algorithm<boost::uint64_t,337515847,373587883,255150899,401959183> rabin_karp64;
 
-} // namespace algorithm
-} // namespace boost
+} } // namespace algorithm, namespace boost
 
+namespace boost { using boost::algorithm::rabin_karp32; using boost::algorithm::rabin_karp64; }
 
 #endif
\ No newline at end of file
Modified: sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp	(original)
+++ sandbox/SOC/2010/stringalgos/boost/algorithm/string/string_search/suffix_array.hpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -1,14 +1,209 @@
 #ifndef BOOST_ALGORITHM_SUFFIX_ARRAY_HPP
 #define BOOST_ALGORITHM_SUFFIX_ARRAY_HPP
 
+#include <iterator>
+#include <vector>
+#include <algorithm>
+#include <boost/range/begin.hpp>
+#include <boost/range/end.hpp>
+#include <boost/range/iterator_range.hpp>
+#include <boost/range/algorithm/sort.hpp>
+
 namespace boost { namespace algorithm {
-    struct suffix_array
+    struct suffix_array_search
     {
-        template <class
+        typedef std::allocator<std::size_t> default_allocator_type;
+        template <class Finder,class RandomAccessIterator1T,
+            class RandomAccessIterator2T,class Comparator,class Allocator>
         class algorithm
         {
+        public:
+            typedef RandomAccessIterator1T substring_iterator_type;
+	        typedef RandomAccessIterator2T string_iterator_type;
+            typedef typename
+                std::iterator_traits<substring_iterator_type>::value_type substring_char_type;
+            typedef typename std::iterator_traits<string_iterator_type>::value_type string_char_type;
+            typedef typename boost::iterator_range<substring_iterator_type> substring_range_type;
+            typedef typename boost::iterator_range<string_iterator_type> string_range_type;
+            typedef Comparator comparator_type;
+            typedef Allocator allocator_type;
+        protected:
+            algorithm () : found_matches_(false), pos_(), matches_() { }
+            
+            void on_substring_change()
+            { on_substring_change(std::iterator_traits<substring_iterator_type>::iterator_category()); }
+            
+            void on_string_change()
+            { on_string_change(std::iterator_traits<string_iterator_type>::iterator_category()); }
+
+            string_range_type find (string_iterator_type start)
+            {
+                substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
+                string_range_type const &str = static_cast<Finder*>(this)->get_string_range();
+                std::size_t start_offset = start - boost::begin(str),
+                    substr_size = boost::end(substr) - boost::begin(substr),
+                    str_size = boost::end(str) - boost::begin(str);
+
+                if (found_matches_)
+                    return find_first_match(start_offset);
+                
+                if (boost::begin(str) == boost::end(str)) return string_range_type(boost::end(str), boost::end(str));
+                
+                std::size_t suffix_left, suffix_right;
+                std::size_t firstsuffix_end = pos_[0] + substr_size, lastsuffix_end = pos_.back()+substr_size;
+                //the end position of the smallest lexicographic suffix
+                //if (firstsuffix_end > str.size()) firstsuffix_end = str.size();
+                //the end position of the biggest lexicographic suffix
+                if (lastsuffix_end > str_size) lastsuffix_end = str_size;
+                //the substring is smaller than the smallest lexicographic suffix, therefore no matches
+                //if (std::lexicographical_compare(substr.begin(), substr.end(),str.begin()+pos[0],str.begin()+firstsuffix_end))
+                if (suffix_less(substr, str, 0) ||
+                    std::lexicographical_compare(str.begin()+pos_.back(),str.begin()+lastsuffix_end,substr.begin(),substr.end())
+                    )
+                {
+                    found_matches_ = true;
+                    matches_.clear();
+                    return string_range_type( boost::end(str), boost::end(str) );
+                }
+                //the substring is bigger than the biggest lexicographic suffix, therefore no matches
+                //else if ()
+                //    return;
+                //perform binary search in the array of suffixes, find a range of matching suffixes
+                else
+                {
+                    std::size_t left = 0, right=str_size, middle, result = 0;
+                    while (right > left) // nonsingular range [left,right)
+                    {
+                        middle = (left+right)/2;
+                        //does the substring match the prefix of the suffix `middle'?
+                        //if (std::equal(substr.begin(), substr.end(), str.begin()+pos[middle]))
+                        if (suffix_equal(substr, str, middle))
+                        { result = middle; break; }
+                        //is the substring lexicographically less than the prefix of the suffix `middle'?
+                        else if (suffix_less(substr, str, middle))
+                        //else if (std::lexicographical_compare(substr.begin(), substr.end(),
+                        //    str.begin()+pos[middle], str.end()))
+                        { right = middle; }
+                        //it must be greater.
+                        else { left = middle+1; }
+                    }
+                    //empty interval, no matches
+                    if (left == right) return string_range_type(boost::end(str), boost::end(str));
+                    
+
+                    //suffix_left = lower bound of the matches
+                    if (result > 0)
+                    {
+                        suffix_left = result;
+                        //while (suffix_left >= 1 && std::equal(substr.begin(), substr.end(), str.begin()+pos[suffix_left-1]))
+                        while (suffix_left >= 1 && suffix_equal(substr, str, suffix_left-1))
+                            --suffix_left;
+                        //++suffix_left;
+                    } else suffix_left = result;
+
+                    //suffix_right = upper bound of the matches
+                    suffix_right = result+1;
+                    //while (suffix_right < str.size() && std::equal(substr.begin(), substr.end(), str.begin()+pos[suffix_right]))
+                    while (suffix_right < str_size && suffix_equal(substr, str, suffix_right))
+                        ++suffix_right;
+
+                    //there are (suffix_right - suffix_left) matches, corresponding to the positions:
+                    // { pos[k] | k \in [suffix_left,suffix_right) }
+                    found_matches_ = true;
+                    matches_.clear();
+                    matches_.reserve(suffix_right - suffix_left);
+                    for (std::size_t k = suffix_left; k < suffix_right; ++k)
+                        matches_.push_back(pos_[k]);
+                    boost::sort(matches_);
+                    return find_first_match(start_offset);
+                }
+            }
+        private:
+            inline string_range_type find_first_match (std::size_t start_offset)
+            {
+                string_range_type const &str = static_cast<Finder*>(this)->get_string_range();
+                substring_range_type const &substr = static_cast<Finder*>(this)->get_substring_range();
+
+                std::vector<std::size_t, Allocator>::const_iterator first_match = 
+                            std::lower_bound(matches_.begin(), matches_.end(), start_offset);
+                if (first_match == matches_.end())
+                    return string_range_type(
+                        boost::end(str), boost::end(str));
+                std::size_t substr_size = boost::end(substr) - boost::begin(substr);
+                assert(*first_match + substr_size <= static_cast<std::size_t>(str.size()) );
+                return string_range_type(
+                    boost::begin(str) + (*first_match),
+                    boost::begin(str) + (*first_match) + substr_size
+                    );
+            }
+            void on_substring_change(std::random_access_iterator_tag)
+            {
+                found_matches_ = false;
+            }
+
+            void on_string_change(std::random_access_iterator_tag)
+            {
+                string_range_type const &str = static_cast<Finder*>(this)->get_string_range();
+
+                // compute the suffix array
+                std::size_t str_size = boost::end(str) - boost::begin(str);
+                pos_.clear();
+                pos_.reserve(str_size);
+                std::generate_n(std::back_inserter(pos_), str_size, increasing_generator());
+                std::sort(pos_.begin(), pos_.end(), suffix_array_sort_comparator(str));
+
+                found_matches_ = false;
+            }
+
+            inline bool suffix_less(substring_range_type const &substr,
+                string_range_type const &str, std::size_t suffix)
+            {
+                std::size_t start_offset = pos_[suffix],
+                    end_offset = start_offset + (boost::end(substr) - boost::begin(substr)),
+                    str_size = boost::end(str) - boost::begin(str);
+                if (end_offset > str_size) end_offset = str_size;
+                return std::lexicographical_compare(
+                    boost::begin(substr), boost::end(substr),
+                    boost::begin(str) + start_offset, boost::begin(str) + end_offset);
+            }
+
+            inline bool suffix_equal(substring_range_type const &substr,
+                string_range_type const &str, std::size_t suffix)
+            {
+                std::size_t start_offset = pos_[suffix],
+                    end_offset = start_offset + (boost::end(substr) - boost::begin(substr)),
+                    str_size = boost::end(str) - boost::begin(str);
+                //the substr is longer than the suffix, they can't be equal.
+                if (end_offset > str_size) return false;
+                return std::equal(substr.begin(), substr.end(), boost::begin(str)+start_offset);
+            }
+
+            std::vector<std::size_t, Allocator> pos_;
+            bool found_matches_;
+            std::vector<std::size_t, Allocator> matches_;
+
+
+            struct increasing_generator
+            {
+                increasing_generator () : idx_(0) { }
+                inline std::size_t operator()() { return idx_++; }
+                std::size_t idx_;
+            };
+
+            struct suffix_array_sort_comparator
+            {
+                suffix_array_sort_comparator (string_range_type const &str) : str_(str) { }
+                bool operator()(std::size_t lhs, std::size_t rhs)
+                {
+                    return std::lexicographical_compare(str_.begin()+lhs, str_.end(), str_.begin()+rhs, str_.end());
+                }
+                string_range_type const &str_;
+            };
+
         };
     };
 } }
 
+namespace boost { using boost::algorithm::suffix_array_search; }
+
 #endif
\ No newline at end of file
Modified: sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp
==============================================================================
--- sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp	(original)
+++ sandbox/SOC/2010/stringalgos/libs/algorithm/string/test/finder_test.cpp	2010-07-13 21:24:19 EDT (Tue, 13 Jul 2010)
@@ -7,12 +7,14 @@
 
 //  See http://www.boost.org for updates, documentation, and revision history.
 
+
 #include <boost/algorithm/string/find.hpp>
 #include <boost/algorithm/string/finder.hpp>
 #include <boost/algorithm/string/string_search/naive_search.hpp>
 #include <boost/algorithm/string/string_search/rabin_karp.hpp>
 #include <boost/algorithm/string/string_search/knuth_morris_pratt.hpp>
 #include <boost/algorithm/string/string_search/boyer_moore.hpp>
+#include <boost/algorithm/string/string_search/suffix_array.hpp>
 #include <boost/algorithm/string/classification.hpp>
 #include <boost/config.hpp>
 
@@ -23,6 +25,11 @@
 #include <sstream>
 #include <list>
 
+
+#if !defined(BOOST_HAS_RVALUE_REFS) && !defined(BOOST_NO_RVALUE_REFS)
+#define BOOST_HAS_RVALUE_REFS
+#endif
+
 //#define BOOST_TEST_ALTERNATIVE_INIT_API
 //#define BOOST_TEST_DYN_LINK
 #include <boost/test/unit_test.hpp>
@@ -186,7 +193,6 @@
 
     //!\todo fix
 #   ifdef BOOST_HAS_RVALUE_REFS
-    BOOST_CHECK(false);
     f_t f6( (Sequence1T(s1)), &S1); // rvalue, ptr
     BOOST_CHECK(boost::equal(f6.get_substring_range(),s1));
     BOOST_CHECK(superficial_range_equal(f6.get_string_range(),S1));
@@ -584,13 +590,22 @@
 
 boost::unit_test::test_suite* init_unit_test_suite( int argc, char* argv[] )
 {
-    // Note: we should test with other allocators and comparators
+
+    /*!\todo test with other allocators and comparators
+    \todo test with different iterator types, depending on what each algorithm supports
+    \todo test matches of empty substring in nonempty string (should return empty range but iterators with proper offset)
+    \todo test the nonmatch of nonempty substring in empty string
+    \todo test the unique match of empty substring in empty string
+    \todo test more strings with consecutive substring matches
+    */
+
     typedef boost::mpl::list<
         boost::algorithm::naive_search,
         boost::algorithm::rabin_karp32,
-        boost::algorithm::knuth_morris_pratt/*,
+        boost::algorithm::rabin_karp64,
+        boost::algorithm::knuth_morris_pratt,
         boost::algorithm::boyer_moore,
-        boost::algorithm::suffix_array*/
+        boost::algorithm::suffix_array_search
     > algorithm_list;
     boost::unit_test::framework::master_test_suite().add(
         BOOST_TEST_CASE_TEMPLATE(test_finders, algorithm_list)