$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
From: eric_at_[hidden]
Date: 2008-01-14 15:26:59
Author: eric_niebler
Date: 2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
New Revision: 42776
URL: http://svn.boost.org/trac/boost/changeset/42776
Log:
add skip(), for specifying a skip regex
Added:
   trunk/libs/xpressive/test/test_skip.cpp   (contents, props changed)
Text files modified: 
   trunk/boost/xpressive/regex_primitives.hpp |   129 ++++++++++++++++++++++++++++++++++++++++
   trunk/libs/xpressive/test/Jamfile.v2       |     1                                         
   2 files changed, 130 insertions(+), 0 deletions(-)
Modified: trunk/boost/xpressive/regex_primitives.hpp
==============================================================================
--- trunk/boost/xpressive/regex_primitives.hpp	(original)
+++ trunk/boost/xpressive/regex_primitives.hpp	2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
@@ -115,6 +115,93 @@
     #undef minus_one
     #endif
 
+    // replace "Expr" with "keep(*State) >> Expr"
+    struct skip_primitives : proto::callable
+    {
+        template<typename Sig>
+        struct result;
+
+        template<typename This, typename Expr, typename State, typename Visitor>
+        struct result<This(Expr, State, Visitor)>
+          : proto::shift_right<
+                typename proto::unary_expr<
+                    keeper_tag
+                  , typename proto::dereference<State>::type
+                >::type
+              , Expr
+            >
+        {};
+
+        template<typename Expr, typename State, typename Visitor>
+        typename result<void(Expr, State, Visitor)>::type
+        operator ()(Expr const &expr, State const &state, Visitor &visitor) const
+        {
+            typedef typename result<void(Expr, State, Visitor)>::type type;
+            type that = {{{state}}, expr};
+            return that;
+        }
+    };
+
+    struct Primitives
+      : proto::or_<
+            proto::terminal<proto::_>
+          , proto::comma<proto::_, proto::_>
+          , proto::subscript<proto::terminal<set_initializer>, proto::_> 
+          , proto::assign<proto::terminal<set_initializer>, proto::_>
+          , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_>
+          , proto::complement<Primitives>
+        >
+    {};
+
+    struct SkipGrammar
+      : proto::or_<
+            proto::when<Primitives, skip_primitives>
+          , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar>   // don't "skip" mark tags
+          , proto::subscript<SkipGrammar, proto::_>                         // don't put skips in actions
+          , proto::binary_expr<modifier_tag, proto::_, SkipGrammar>         // don't skip modifiers
+          , proto::unary_expr<lookbehind_tag, proto::_>                     // don't skip lookbehinds
+          , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> >         // everything else is fair game!
+        >
+    {};
+
+    template<typename Skip>
+    struct skip_directive
+    {
+        typedef typename proto::result_of::as_expr<Skip>::type skip_type;
+
+        skip_directive(Skip const &skip)
+          : skip_(proto::as_expr(skip))
+        {}
+
+        template<typename Sig>
+        struct result;
+
+        template<typename This, typename Expr>
+        struct result<This(Expr)>
+          : proto::shift_right<
+                typename SkipGrammar::result<void(
+                    typename proto::result_of::as_expr<Expr>::type
+                  , skip_type
+                  , mpl::void_
+                )>::type
+              , typename proto::dereference<skip_type>::type
+            >
+        {};
+
+        template<typename Expr>
+        typename result<skip_directive(Expr)>::type
+        operator ()(Expr const &expr) const
+        {
+            mpl::void_ ignore;
+            typedef typename result<skip_directive(Expr)>::type result_type;
+            result_type result = {SkipGrammar()(proto::as_expr(expr), this->skip_, ignore), {skip_}};
+            return result;
+        }
+
+    private:
+        skip_type skip_;
+    };
+
 /*
 ///////////////////////////////////////////////////////////////////////////////
 /// INTERNAL ONLY
@@ -637,6 +724,48 @@
 proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}};
 proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}};
 
+///////////////////////////////////////////////////////////////////////////////
+/// \brief Specify which characters to skip when matching a regex.
+///
+/// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching
+/// a regex. It is most useful for writing regexes that ignore whitespace.
+/// For instance, the following specifies a regex that skips whitespace and
+/// punctuation:
+///
+/// \code
+/// // A sentence is one or more words separated by whitespace
+/// // and punctuation.
+/// sregex word = +alpha;
+/// sregex sentence = skip(set[_s | punct])( +word );
+/// \endcode
+///
+/// The way it works in the above example is to insert
+/// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex.
+/// A "primitive" includes terminals like strings, character sets and nested
+/// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the
+/// regex. The regex <tt>sentence</tt> specified above is equivalent to
+/// the following:
+///
+/// \code
+/// sregex sentence = +( keep(*set[_s | punct]) >> word )
+///                        >> *set[_s | punct];
+/// \endcode
+///
+/// \attention Skipping does not affect how nested regexes are handles because
+/// they are treated atomically. String literals are also treated
+/// atomically; that is, no skipping is done within a string literal. So
+/// <tt>skip(_s)("this that")</tt> is not the same as
+/// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match
+/// when there is only one space between "this" and "that". The second will
+/// skip any and all whitespace between "this" and "that".
+///
+/// \param skip A regex that specifies which characters to skip.
+template<typename Skip>
+detail::skip_directive<Skip> skip(Skip const &skip)
+{
+    return detail::skip_directive<Skip>(skip);
+}
+
 namespace detail
 {
     inline void ignore_unused_regex_primitives()
Modified: trunk/libs/xpressive/test/Jamfile.v2
==============================================================================
--- trunk/libs/xpressive/test/Jamfile.v2	(original)
+++ trunk/libs/xpressive/test/Jamfile.v2	2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
@@ -57,6 +57,7 @@
          [ run test_symbols.cpp ]
          [ run test_dynamic.cpp ]
          [ run test_dynamic_grammar.cpp ]
+         [ run test_skip.cpp ]
          [ link multiple_defs1.cpp multiple_defs2.cpp : : multiple_defs ]
          [ compile test_basic_regex.cpp ]
          [ compile test_match_results.cpp ]
Added: trunk/libs/xpressive/test/test_skip.cpp
==============================================================================
--- (empty file)
+++ trunk/libs/xpressive/test/test_skip.cpp	2008-01-14 15:26:58 EST (Mon, 14 Jan 2008)
@@ -0,0 +1,96 @@
+///////////////////////////////////////////////////////////////////////////////
+// test_skip.hpp
+//
+//  Copyright 2004 Eric Niebler. Distributed under the Boost
+//  Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <map>
+#include <iostream>
+#include <boost/xpressive/xpressive.hpp>
+#include <boost/xpressive/regex_actions.hpp>
+#include <boost/test/unit_test.hpp>
+
+using namespace boost::unit_test;
+using namespace boost::xpressive;
+
+void test1()
+{
+    std::string s = "a a b b c c";
+
+    sregex rx =
+        "a a" >>
+        skip(_s)
+        (
+             (s1= as_xpr('b')) >>
+             as_xpr('b') >>
+            *as_xpr('c')    // causes backtracking
+        ) >> 
+        "c c";
+
+    smatch what;
+    BOOST_CHECK( regex_match(s, what, rx) );
+
+    s = "123,456,789";
+    sregex rx2 = skip(',')(+_d);
+    BOOST_CHECK( regex_match(s, what, rx2) );
+
+    s = "foo";
+    sregex rx3 = skip(_s)(after("fo") >> 'o');
+    BOOST_CHECK( regex_search(s, what, rx3) );
+}
+
+template<typename Expr>
+void test_skip_aux(Expr const &expr)
+{
+    sregex rx = skip(_s)(expr);
+}
+
+void test_skip()
+{
+    int i=0;
+    std::map<std::string, int> syms;
+    std::locale loc;
+
+    test_skip_aux( 'a' );
+    test_skip_aux( _ );
+    test_skip_aux( +_ );
+    test_skip_aux( -+_ );
+    test_skip_aux( !_ );
+    test_skip_aux( -!_ );
+    test_skip_aux( repeat<0,42>(_) );
+    test_skip_aux( -repeat<0,42>(_) );
+    test_skip_aux( _ >> 'a' );
+    test_skip_aux( _ >> 'a' | _ );
+    test_skip_aux( _ >> 'a' | _ >> 'b' );
+    test_skip_aux( s1= _ >> 'a' | _ >> 'b' );
+    test_skip_aux( icase(_ >> 'a' | _ >> 'b') );
+    test_skip_aux( imbue(loc)(_ >> 'a' | _ >> 'b') );
+    test_skip_aux( (set='a') );
+    test_skip_aux( (set='a','b') );
+    test_skip_aux( ~(set='a') );
+    test_skip_aux( ~(set='a','b') );
+    test_skip_aux( range('a','b') );
+    test_skip_aux( ~range('a','b') );
+    test_skip_aux( set['a' | alpha] );
+    test_skip_aux( ~set['a' | alpha] );
+    test_skip_aux( before(_) );
+    test_skip_aux( ~before(_) );
+    test_skip_aux( after(_) );
+    test_skip_aux( ~after(_) );
+    test_skip_aux( keep(*_) );
+    test_skip_aux( (*_)[ref(i) = as<int>(_) + 1] );
+    test_skip_aux( (a1= syms)[ref(i) = a1 + 1] );
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// init_unit_test_suite
+//
+test_suite* init_unit_test_suite( int argc, char* argv[] )
+{
+    test_suite *test = BOOST_TEST_SUITE("test skip()");
+
+    test->add(BOOST_TEST_CASE(&test1));
+
+    return test;
+}