$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
From: eric_at_[hidden]
Date: 2008-01-26 14:38:46
Author: eric_niebler
Date: 2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
New Revision: 42986
URL: http://svn.boost.org/trac/boost/changeset/42986
Log:
optimize repeated searches with patterns that have leading repeats
Text files modified: 
   trunk/boost/xpressive/detail/core/finder.hpp                        |    22 +++++++++++++                           
   trunk/boost/xpressive/detail/core/linker.hpp                        |    26 ++++++++++++++++                        
   trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp |    26 ++++++++++++++++                        
   trunk/boost/xpressive/detail/core/optimize.hpp                      |     9 ++++                                    
   trunk/boost/xpressive/detail/core/peeker.hpp                        |    65 +++++++++++++++++++++------------------ 
   trunk/boost/xpressive/detail/core/state.hpp                         |     2 +                                       
   trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp    |     6 +-                                      
   trunk/boost/xpressive/regex_algorithms.hpp                          |     4 +-                                      
   trunk/boost/xpressive/regex_iterator.hpp                            |     9 +++-                                    
   trunk/boost/xpressive/regex_token_iterator.hpp                      |    12 ++++---                                 
   10 files changed, 137 insertions(+), 44 deletions(-)
Modified: trunk/boost/xpressive/detail/core/finder.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/finder.hpp	(original)
+++ trunk/boost/xpressive/detail/core/finder.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -185,6 +185,28 @@
     bool bits_[256];
 };
 
+///////////////////////////////////////////////////////////////////////////////
+// leading_simple_repeat_finder
+//
+template<typename BidiIter>
+struct leading_simple_repeat_finder
+  : finder<BidiIter>
+{
+    leading_simple_repeat_finder()
+      : finder<BidiIter>()
+    {}
+
+    bool operator ()(match_state<BidiIter> &state) const
+    {
+        state.cur_ = state.next_search_;
+        return true;
+    }
+
+private:
+    leading_simple_repeat_finder(leading_simple_repeat_finder const &);
+    leading_simple_repeat_finder &operator =(leading_simple_repeat_finder const &);
+};
+
 }}}
 
 #if defined(_MSC_VER) && (_MSC_VER >= 1020)
Modified: trunk/boost/xpressive/detail/core/linker.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/linker.hpp	(original)
+++ trunk/boost/xpressive/detail/core/linker.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -145,6 +145,7 @@
       : back_stack_()
       , traits_(&traits)
       , traits_type_(&typeid(Traits))
+      , has_backrefs_(false)
     {
     }
 
@@ -154,6 +155,24 @@
         // no-op
     }
 
+    template<typename Traits, typename ICase>
+    void accept(mark_matcher<Traits, ICase> const &, void const *)
+    {
+        this->has_backrefs_ = true;
+    }
+
+    template<typename Action>
+    void accept(action_matcher<Action> const &, void const *)
+    {
+        this->has_backrefs_ = true;
+    }
+
+    template<typename Predicate>
+    void accept(predicate_matcher<Predicate> const &, void const *)
+    {
+        this->has_backrefs_ = true;
+    }
+
     void accept(repeat_begin_matcher const &, void const *next)
     {
         this->back_stack_.push(next);
@@ -217,6 +236,12 @@
         matcher.xpr_.link(*this);
     }
 
+    // accessors
+    bool has_backrefs() const
+    {
+        return this->has_backrefs_;
+    }
+
     // for use by alt_link_pred below
     template<typename Xpr>
     void alt_branch_link(Xpr const &xpr, void const *next, xpression_peeker<Char> *peeker)
@@ -292,6 +317,7 @@
     std::stack<void const *> back_stack_;
     void const *traits_;
     std::type_info const *traits_type_;
+    bool has_backrefs_;
 };
 
 }}} // namespace boost::xpressive::detail
Modified: trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp	(original)
+++ trunk/boost/xpressive/detail/core/matcher/simple_repeat_matcher.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -16,6 +16,7 @@
 #include <boost/assert.hpp>
 #include <boost/mpl/if.hpp>
 #include <boost/mpl/bool.hpp>
+#include <boost/next_prior.hpp>
 #include <boost/xpressive/detail/detail_fwd.hpp>
 #include <boost/xpressive/detail/core/quant_style.hpp>
 #include <boost/xpressive/detail/core/state.hpp>
@@ -65,12 +66,14 @@
         Xpr xpr_;
         unsigned int min_, max_;
         std::size_t width_;
+        mutable bool leading_;
 
         simple_repeat_matcher(Xpr const &xpr, unsigned int min, unsigned int max, std::size_t width)
           : xpr_(xpr)
           , min_(min)
           , max_(max)
           , width_(width)
+          , leading_(false)
         {
             // it is the job of the parser to make sure this never happens
             BOOST_ASSERT(min <= max);
@@ -101,6 +104,16 @@
                 ++matches;
             }
 
+            // If this repeater is at the front of the pattern, note
+            // how much of the input we consumed so that a repeated search
+            // doesn't have to cover the same ground again.
+            if(this->leading_)
+            {
+                state.next_search_ = (matches && matches < this->max_)
+                                   ? state.cur_
+                                   : (tmp == state.end_) ? tmp : boost::next(tmp);
+            }
+
             if(this->min_ > matches)
             {
                 state.cur_ = tmp;
@@ -126,6 +139,7 @@
         template<typename BidiIter, typename Next>
         bool match_(match_state<BidiIter> &state, Next const &next, non_greedy_tag) const
         {
+            BOOST_ASSERT(!this->leading_);
             BidiIter const tmp = state.cur_;
             unsigned int matches = 0;
 
@@ -161,12 +175,24 @@
             // is there enough room?
             if(this->min_ > diff_to_end)
             {
+                if(this->leading_)
+                {
+                    // BUGBUG
+                    state.next_search_ = boost::next(tmp);
+                }
                 return false;
             }
 
             BidiIter const min_iter = tmp + this->min_;
             state.cur_ += (std::min)((std::size_t)this->max_, diff_to_end);
 
+            if(this->leading_)
+            {
+                state.next_search_ = (diff_to_end && diff_to_end < this->max_)
+                                   ? state.cur_
+                                   : (tmp == state.end_) ? tmp : boost::next(tmp);
+            }
+
             for(;; --state.cur_)
             {
                 if(next.match(state))
Modified: trunk/boost/xpressive/detail/core/optimize.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/optimize.hpp	(original)
+++ trunk/boost/xpressive/detail/core/optimize.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -39,6 +39,13 @@
             new line_start_finder<BidiIter, Traits>(traits)
         );
     }
+    else if(peeker.leading_simple_repeat())
+    {
+        return intrusive_ptr<finder<BidiIter> >
+        (
+            new leading_simple_repeat_finder<BidiIter>()
+        );
+    }
     else if(256 != peeker.bitset().count())
     {
         return intrusive_ptr<finder<BidiIter> >
@@ -96,7 +103,7 @@
 
     // "peek" into the compiled regex to see if there are optimization opportunities
     hash_peek_bitset<char_type> bset;
-    xpression_peeker<char_type> peeker(bset, traits);
+    xpression_peeker<char_type> peeker(bset, traits, linker.has_backrefs());
     regex->peek(peeker);
 
     // optimization: get the peek chars OR the boyer-moore search string
Modified: trunk/boost/xpressive/detail/core/peeker.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/peeker.hpp	(original)
+++ trunk/boost/xpressive/detail/core/peeker.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -31,28 +31,7 @@
 {
 
 ///////////////////////////////////////////////////////////////////////////////
-// peek_next
-//   tell whether or not to keep looking for a peek optimization
-template<typename Matcher>
-struct peek_next
-  : mpl::bool_<Matcher::width == 0>
-{
-};
-
-template<>
-struct peek_next<mark_begin_matcher>
-  : mpl::true_
-{
-};
-
-template<>
-struct peek_next<repeat_begin_matcher>
-  : mpl::true_
-{
-};
-
-///////////////////////////////////////////////////////////////////////////////
-// xpression_peeker
+// peeker_string
 //
 template<typename Char>
 struct peeker_string
@@ -91,12 +70,14 @@
 struct xpression_peeker
 {
     template<typename Traits>
-    xpression_peeker(hash_peek_bitset<Char> &bset, Traits const &traits)
+    xpression_peeker(hash_peek_bitset<Char> &bset, Traits const &traits, bool has_backrefs = false)
       : bset_(bset)
       , str_()
       , line_start_(false)
       , traits_(0)
       , traits_type_(0)
+      , leading_simple_repeat_(0)
+      , has_backrefs_(has_backrefs)
     {
         this->set_traits(traits);
     }
@@ -113,6 +94,11 @@
         return this->line_start_;
     }
 
+    bool leading_simple_repeat() const
+    {
+        return 0 < this->leading_simple_repeat_;
+    }
+
     hash_peek_bitset<Char> const &bitset() const
     {
         return this->bset_;
@@ -120,19 +106,31 @@
 
     ///////////////////////////////////////////////////////////////////////////////
     // modifiers
-    void fail(bool do_fail = true)
+    void fail()
     {
-        if(do_fail)
+        this->bset_.set_all();
+    }
+
+    template<typename Matcher>
+    mpl::false_ accept(Matcher const &)
+    {
+        this->fail();
+        return mpl::false_();
+    }
+
+    mpl::true_ accept(mark_begin_matcher const &)
+    {
+        if(this->has_backrefs_)
         {
-            this->bset_.set_all();
+            --this->leading_simple_repeat_;
         }
+        return mpl::true_();
     }
 
-    template<typename Matcher>
-    peek_next<Matcher> accept(Matcher const &)
+    mpl::true_ accept(repeat_begin_matcher const &)
     {
-        this->fail(!peek_next<Matcher>::value);
-        return peek_next<Matcher>();
+        --this->leading_simple_repeat_;
+        return mpl::true_();
     }
 
     template<typename Traits>
@@ -228,6 +226,11 @@
     template<typename Xpr, typename Greedy>
     mpl::false_ accept(simple_repeat_matcher<Xpr, Greedy> const &xpr)
     {
+        if(Greedy() && 1U == xpr.width_)
+        {
+            ++this->leading_simple_repeat_;
+            xpr.leading_ = this->leading_simple_repeat();
+        }
         0 != xpr.min_ ? xpr.xpr_.peek(*this) : this->fail(); // could be a union of xpr and next
         return mpl::false_();
     }
@@ -268,6 +271,8 @@
     bool line_start_;
     void const *traits_;
     std::type_info const *traits_type_;
+    int leading_simple_repeat_;
+    bool has_backrefs_;
 };
 
 }}} // namespace boost::xpressive::detail
Modified: trunk/boost/xpressive/detail/core/state.hpp
==============================================================================
--- trunk/boost/xpressive/detail/core/state.hpp	(original)
+++ trunk/boost/xpressive/detail/core/state.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -127,6 +127,7 @@
     actionable const **action_list_tail_;
     action_args_type *action_args_;
     attr_context attr_context_;
+    BidiIter next_search_;
 
     ///////////////////////////////////////////////////////////////////////////////
     //
@@ -151,6 +152,7 @@
       , action_list_tail_(&action_list_.next)
       , action_args_(&core_access<BidiIter>::get_action_args(what))
       , attr_context_() // zero-initializes the fields of attr_context_
+      , next_search_(begin)
     {
         // reclaim any cached memory in the match_results struct
         this->extras_->sub_match_stack_.unwind();
Modified: trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp
==============================================================================
--- trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp	(original)
+++ trunk/boost/xpressive/detail/static/transforms/as_quantifier.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -88,7 +88,7 @@
             typedef typename Expr::proto_tag tag;
 
             xpr_type const &xpr = Grammar()(proto::arg(expr), detail::true_xpression(), visitor);
-            matcher_type matcher(xpr, min_type<tag>(), max_type<tag>(), xpr.get_width().value());
+            matcher_type matcher(xpr, (uint_t)min_type<tag>(), (uint_t)max_type<tag>(), xpr.get_width().value());
             return proto::terminal<matcher_type>::type::make(matcher);
         }
     };
@@ -174,8 +174,8 @@
             int mark_number = proto::arg(proto::left(marked_sub)).mark_number_;
             BOOST_ASSERT(0 != mark_number);
 
-            unsigned min_ = min_type<typename Expr::proto_tag>();
-            unsigned max_ = max_type<typename Expr::proto_tag>();
+            uint_t min_ = (uint_t)min_type<typename Expr::proto_tag>();
+            uint_t max_ = (uint_t)max_type<typename Expr::proto_tag>();
 
             detail::repeat_begin_matcher begin(mark_number);
             detail::repeat_end_matcher<Greedy> end(mark_number, min_, max_);
Modified: trunk/boost/xpressive/regex_algorithms.hpp
==============================================================================
--- trunk/boost/xpressive/regex_algorithms.hpp	(original)
+++ trunk/boost/xpressive/regex_algorithms.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -537,7 +537,7 @@
         }
 
         out = what.format(out, fmt, flags);
-        cur = state.cur_ = what[0].second;
+        cur = state.cur_ = state.next_search_ = what[0].second;
 
         if(0 == (flags & format_first_only))
         {
@@ -552,7 +552,7 @@
 
                 access::set_prefix_suffix(what, begin, end);
                 out = what.format(out, fmt, flags);
-                cur = state.cur_ = what[0].second;
+                cur = state.cur_ = state.next_search_ = what[0].second;
                 not_null = (0 == what.length());
                 state.reset(what, *access::get_regex_impl(re));
             }
Modified: trunk/boost/xpressive/regex_iterator.hpp
==============================================================================
--- trunk/boost/xpressive/regex_iterator.hpp	(original)
+++ trunk/boost/xpressive/regex_iterator.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -39,6 +39,7 @@
         BidiIter begin
       , BidiIter cur
       , BidiIter end
+      , BidiIter next_search
       , basic_regex<BidiIter> const *rex
       , regex_constants::match_flag_type flags
       , bool not_null = false
@@ -50,6 +51,7 @@
       , not_null_(not_null)
     {
         this->state_.cur_ = cur;
+        this->state_.next_search_ = next_search;
     }
 
     bool next()
@@ -63,7 +65,7 @@
         // Report position() correctly by setting the base different from prefix().first
         access::set_base(this->what_, this->state_.begin_);
 
-        this->state_.cur_ = this->what_[0].second;
+        this->state_.cur_ = this->state_.next_search_ = this->what_[0].second;
         this->not_null_ = (0 == this->what_.length());
 
         return true;
@@ -116,7 +118,7 @@
       , basic_regex<BidiIter> const &rex
       , regex_constants::match_flag_type flags = regex_constants::match_default
     )
-      : impl_(new impl_type_(begin, begin, end, &rex, flags))
+      : impl_(new impl_type_(begin, begin, end, begin, &rex, flags))
     {
         this->next_();
     }
@@ -130,7 +132,7 @@
       , detail::let_<LetExpr> const &args
       , regex_constants::match_flag_type flags = regex_constants::match_default
     )
-      : impl_(new impl_type_(begin, begin, end, &rex, flags))
+      : impl_(new impl_type_(begin, begin, end, begin, &rex, flags))
     {
         detail::bind_args(args, this->impl_->what_);
         this->next_();
@@ -222,6 +224,7 @@
                 that->state_.begin_
               , that->state_.cur_
               , that->state_.end_
+              , that->state_.next_search_
               , that->rex_
               , that->flags_
               , that->not_null_
Modified: trunk/boost/xpressive/regex_token_iterator.hpp
==============================================================================
--- trunk/boost/xpressive/regex_token_iterator.hpp	(original)
+++ trunk/boost/xpressive/regex_token_iterator.hpp	2008-01-26 14:38:44 EST (Sat, 26 Jan 2008)
@@ -39,13 +39,14 @@
         BidiIter begin
       , BidiIter cur
       , BidiIter end
+      , BidiIter next_search
       , basic_regex<BidiIter> const *rex
       , regex_constants::match_flag_type flags = regex_constants::match_default
       , std::vector<int> subs = std::vector<int>(1, 0)
       , int n = -2
       , bool not_null = false
     )
-      : iter_(begin, cur, end, rex, flags, not_null)
+      : iter_(begin, cur, end, next_search, rex, flags, not_null)
       , result_()
       , n_((-2 == n) ? (int)subs.size() - 1 : n)
       , subs_()
@@ -158,7 +159,7 @@
       , BidiIter end
       , basic_regex<BidiIter> const &rex
     )
-      : impl_(new impl_type_(begin, begin, end, &rex))
+      : impl_(new impl_type_(begin, begin, end, begin, &rex))
     {
         this->next_();
     }
@@ -176,7 +177,7 @@
       , basic_regex<BidiIter> const &rex
       , detail::let_<LetExpr> const &args
     )
-      : impl_(new impl_type_(begin, begin, end, &rex))
+      : impl_(new impl_type_(begin, begin, end, begin, &rex))
     {
         detail::bind_args(args, this->impl_->iter_.what_);
         this->next_();
@@ -198,7 +199,7 @@
       , Subs const &subs
       , regex_constants::match_flag_type flags = regex_constants::match_default
     )
-      : impl_(new impl_type_(begin, begin, end, &rex, flags, detail::to_vector(subs)))
+      : impl_(new impl_type_(begin, begin, end, begin, &rex, flags, detail::to_vector(subs)))
     {
         this->next_();
     }
@@ -221,7 +222,7 @@
       , detail::let_<LetExpr> const &args
       , regex_constants::match_flag_type flags = regex_constants::match_default
     )
-      : impl_(new impl_type_(begin, begin, end, &rex, flags, detail::to_vector(subs)))
+      : impl_(new impl_type_(begin, begin, end, begin, &rex, flags, detail::to_vector(subs)))
     {
         detail::bind_args(args, this->impl_->iter_.what_);
         this->next_();
@@ -307,6 +308,7 @@
                 this->impl_->iter_.state_.begin_
               , this->impl_->iter_.state_.cur_
               , this->impl_->iter_.state_.end_
+              , this->impl_->iter_.state_.next_search_
               , this->impl_->iter_.rex_
               , this->impl_->iter_.flags_
               , this->impl_->subs_