$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r54676 - in trunk/boost/spirit/home/lex: . lexer lexer/lexertl
From: hartmut.kaiser_at_[hidden]
Date: 2009-07-05 14:50:31
Author: hkaiser
Date: 2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
New Revision: 54676
URL: http://svn.boost.org/trac/boost/changeset/54676
Log:
Spirit: Added support for accessing the token value from a lexer semantic action
Text files modified: 
   trunk/boost/spirit/home/lex/argument.hpp                   |    41 ++++++++++++++++++++++++++++++++++++++  
   trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp      |    22 +++++++++++++++----                     
   trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp |    43 +++++++++++++++++++++++++++++---------- 
   trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp        |     3 -                                       
   trunk/boost/spirit/home/lex/lexer/lexertl/token.hpp        |    18 ++++++++++++++-                         
   trunk/boost/spirit/home/lex/lexer/pass_flags.hpp           |     7 +++--                                   
   trunk/boost/spirit/home/lex/tokenize_and_parse.hpp         |    25 ++++++++++++++++++++++                  
   7 files changed, 135 insertions(+), 24 deletions(-)
Modified: trunk/boost/spirit/home/lex/argument.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/argument.hpp	(original)
+++ trunk/boost/spirit/home/lex/argument.hpp	2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
@@ -133,6 +133,43 @@
     };
 
     ///////////////////////////////////////////////////////////////////////////
+    //  The value_getter is used to create the _value placeholder, which is a 
+    //  Phoenix actor used to access or change the value of the current token.
+    //
+    //  This actor is invoked whenever the placeholder '_value' is used in a
+    //  lexer semantic action:
+    //
+    //      lex::token_def<> identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
+    //      this->self = identifier 
+    //          [ _value = construct_<std::string>(_start, _end) ];
+    //
+    //  The example shows how to use _value to set the identifier name as the 
+    //  token value.
+    struct value_getter
+    {
+        typedef mpl::true_ no_nullary;
+
+        template <typename Env>
+        struct result
+        {
+            typedef typename
+                remove_const<
+                    typename mpl::at_c<typename Env::args_type, 4>::type
+                >::type
+            context_type;
+
+            typedef typename context_type::token_value_type& type;
+        };
+
+        template <typename Env>
+        typename result<Env>::type 
+        eval(Env const& env) const
+        {
+            return fusion::at_c<4>(env.args()).value();
+        }
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
     //  The eoi_getter is used to create the _eoi placeholder, which is a 
     //  Phoenix actor used to access the end of input iterator pointing to the 
     //  end of the underlying input sequence.
@@ -184,6 +221,10 @@
     // token
     phoenix::actor<phoenix::argument<3> > const _tokenid = phoenix::argument<3>();
 
+    // '_value' may be used to access and change the token value of the current
+    // token
+    phoenix::actor<value_getter> const _value = value_getter();
+
     // _state may be used to access and change the name of the current lexer 
     // state
     phoenix::actor<state_context> const _state = state_context();
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp	2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
@@ -58,7 +58,7 @@
     //
     ///////////////////////////////////////////////////////////////////////////
     template <typename Token
-      , template <typename, typename, typename> class FunctorData
+      , template <typename, typename, typename, typename> class FunctorData
       , typename Iterator = typename Token::iterator_type
       , typename SupportsActors = mpl::false_
       , typename SupportsState = typename Token::has_state>
@@ -73,7 +73,9 @@
         // Needed by compilers not implementing the resolution to DR45. For
         // reference, see
         // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45.
-        friend class FunctorData<Iterator, SupportsActors, SupportsState>;
+        typedef typename Token::token_value_type token_value_type;
+        friend class FunctorData<Iterator, SupportsActors, SupportsState
+          , token_value_type>;
 
         // Helper template allowing to assign a value on exit
         template <typename T>
@@ -110,7 +112,8 @@
         // interface to the iterator_policies::split_functor_input policy
         typedef Token result_type;
         typedef functor unique;
-        typedef FunctorData<Iterator, SupportsActors, SupportsState> shared;
+        typedef FunctorData<Iterator, SupportsActors, SupportsState
+          , token_value_type> shared;
 
         BOOST_SPIRIT_EOF_PREFIX result_type const eof;
 
@@ -190,6 +193,13 @@
                     assign_on_exit<Iterator> on_exit(data.get_first(), end);
                     return result = result_type(id, state, data.get_first(), end);
                 }
+                else if (pass_flags::pass_use_value == pass) {
+                    // return matched token using the token value as set before
+                    // using data.set_value(), advancing 'data.first_' past the 
+                    // matched sequence
+                    assign_on_exit<Iterator> on_exit(data.get_first(), end);
+                    return result = result_type(id, state, data.value());
+                }
                 else if (pass_flags::pass_fail == pass) {
                     // if the data.first_ got adjusted above, revert this adjustment
                     if (adjusted)
@@ -199,7 +209,9 @@
                     return result = result_type(0); 
                 }
 
-            // if this token needs to be ignored, just repeat the matching
+            // if this token needs to be ignored, just repeat the matching,
+            // while starting right after the current match
+                data.get_first() = end;
 
             } while (true);
         }
@@ -238,7 +250,7 @@
     //  eof token
     ///////////////////////////////////////////////////////////////////////////
     template <typename Token
-      , template <typename, typename, typename> class FunctorData
+      , template <typename, typename, typename, typename> class FunctorData
       , typename Iterator, typename SupportsActors, typename SupportsState>
     typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const
         functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof = 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp	2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
@@ -23,13 +23,14 @@
     namespace detail
     {
         ///////////////////////////////////////////////////////////////////////
-        template <typename Iterator, typename HasActors, typename HasState>
+        template <typename Iterator, typename HasActors, typename HasState
+          , typename TokenValue>
         struct data;    // no default specialization
 
         ///////////////////////////////////////////////////////////////////////
         //  neither supports state, nor actors
-        template <typename Iterator>
-        struct data<Iterator, mpl::false_, mpl::false_>
+        template <typename Iterator, typename TokenValue>
+        struct data<Iterator, mpl::false_, mpl::false_, TokenValue>
         {
         protected:
             typedef typename 
@@ -38,6 +39,7 @@
 
         public:
             typedef Iterator base_iterator_type;
+            typedef unused_type token_value_type;
             typedef std::size_t state_type;
             typedef char_type const* state_name_type;
             typedef unused_type semantic_actions_type;
@@ -146,6 +148,8 @@
             Iterator const& get_first() const { return first_; }
             Iterator const& get_last() const { return last_; }
 
+            unused_type value() const { return unused; }
+
         protected:
             Iterator& first_;
             Iterator last_;
@@ -156,16 +160,17 @@
 
         ///////////////////////////////////////////////////////////////////////
         //  doesn't support lexer semantic actions
-        template <typename Iterator>
-        struct data<Iterator, mpl::false_, mpl::true_>
-          : data<Iterator, mpl::false_, mpl::false_>
+        template <typename Iterator, typename TokenValue>
+        struct data<Iterator, mpl::false_, mpl::true_, TokenValue>
+          : data<Iterator, mpl::false_, mpl::false_, TokenValue>
         {
         protected:
-            typedef data<Iterator, mpl::false_, mpl::false_> base_type;
+            typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
             typedef typename base_type::char_type char_type;
 
         public:
             typedef Iterator base_iterator_type;
+            typedef unused_type token_value_type;
             typedef typename base_type::state_type state_type;
             typedef typename base_type::state_name_type state_name_type;
             typedef typename base_type::semantic_actions_type 
@@ -221,22 +226,23 @@
 
         ///////////////////////////////////////////////////////////////////////
         //  does support lexer semantic actions, may support state
-        template <typename Iterator, typename HasState>
-        struct data<Iterator, mpl::true_, HasState> 
-          : data<Iterator, mpl::false_, HasState>
+        template <typename Iterator, typename HasState, typename TokenValue>
+        struct data<Iterator, mpl::true_, HasState, TokenValue> 
+          : data<Iterator, mpl::false_, HasState, TokenValue>
         {
         public:
             typedef semantic_actions<Iterator, HasState, data> 
                 semantic_actions_type;
 
         protected:
-            typedef data<Iterator, mpl::false_, HasState> base_type;
+            typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
             typedef typename base_type::char_type char_type;
             typedef typename semantic_actions_type::functor_wrapper_type
                 functor_wrapper_type;
 
         public:
             typedef Iterator base_iterator_type;
+            typedef TokenValue token_value_type;
             typedef typename base_type::state_type state_type;
             typedef typename base_type::state_name_type state_name_type;
 
@@ -310,10 +316,25 @@
                 has_hold_ = true;
             }
 
+            TokenValue const& value() const 
+            {
+                return value_;
+            }
+            TokenValue& value()
+            {
+                return value_;
+            }
+            template <typename Value>
+            void set_value(Value const& val)
+            {
+                value_ = val;
+            }
+
         protected:
             semantic_actions_type const& actions_;
             Iterator hold_;     // iterator needed to support lex::more()
             bool has_hold_;     // 'true' if hold_ is valid
+            TokenValue value_;
         };
     }
 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp	2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
@@ -205,7 +205,7 @@
     ///////////////////////////////////////////////////////////////////////////
     template <typename Token = token<>
       , typename Iterator = typename Token::iterator_type
-      , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::false_>
+      , typename Functor = functor<Token, lexertl::detail::data, Iterator>
       , typename TokenSet = lex::token_set<token_set<Token, Iterator> > >
     class lexer 
     {
@@ -362,7 +362,6 @@
     private:
         // lexertl specific data
         mutable boost::lexer::basic_state_machine<char_type> state_machine_;
-        std::size_t unique_ids_;
         boost::lexer::regex_flags flags_;
         boost::lexer::basic_rules<char_type> rules_;
 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/token.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/token.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/token.hpp	2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
@@ -117,6 +117,7 @@
         typedef Iterator iterator_type;
         typedef mpl::false_ has_state;
         typedef std::size_t id_type;
+        typedef unused_type token_value_type;
 
         //  default constructed tokens correspond to EOI tokens
         token() : id_(boost::lexer::npos) {}
@@ -126,6 +127,9 @@
 
         token(id_type id, std::size_t) : id_(id) {}
 
+        token(id_type id, std::size_t, token_value_type)
+          : id_(id) {}
+
         token(id_type id, std::size_t, Iterator const& first
               , Iterator const& last)
           : id_(id) 
@@ -204,6 +208,10 @@
         token(id_type id, std::size_t state)
           : base_type(id, boost::lexer::npos), state_(state) {}
 
+        token(id_type id, std::size_t state, token_value_type)
+          : base_type(id, boost::lexer::npos, unused)
+          , state_(state) {}
+
         token(id_type id, std::size_t state
               , Iterator const& first, Iterator const& last)
           : base_type(id, boost::lexer::npos, first, last)
@@ -292,11 +300,13 @@
         //  from the iterator pair to the required data type is done when it is
         //  accessed for the first time.
         typedef iterator_range<Iterator> iterpair_type;
+
+    public:
+        typedef typename base_type::id_type id_type;
         typedef typename detail::token_value_type<
             iterpair_type, AttributeTypes
         >::type token_value_type;
 
-    public:
         typedef Iterator iterator_type;
 
         //  default constructed tokens correspond to EOI tokens
@@ -307,7 +317,11 @@
           : base_type(0)
           , value_(iterpair_type(iterator_type(), iterator_type())) {}
 
-        token(std::size_t id, std::size_t state, Iterator const& first
+        token(id_type id, std::size_t state, token_value_type const& value)
+          : base_type(id, state, value)
+          , value_(value) {}
+
+        token(id_type id, std::size_t state, Iterator const& first
               , Iterator const& last)
           : base_type(id, state, first, last)
           , value_(iterpair_type(first, last)) {}
Modified: trunk/boost/spirit/home/lex/lexer/pass_flags.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/pass_flags.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/pass_flags.hpp	2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
@@ -17,9 +17,10 @@
     ///////////////////////////////////////////////////////////////////////////
     BOOST_SCOPED_ENUM_START(pass_flags) 
     { 
-        pass_fail = 0,       // make the current match fail in retrospective
-        pass_normal = 1,     // continue normal token matching, that's the default 
-        pass_ignore = 2      // ignore the current token and start matching the next
+        pass_fail = 0,        // make the current match fail in retrospective
+        pass_normal = 1,      // continue normal token matching, that's the default 
+        pass_ignore = 2,      // ignore the current token and start matching the next
+        pass_use_value = 3    // continue normal matching but use token value as set before
     };
     BOOST_SCOPED_ENUM_END
 
Modified: trunk/boost/spirit/home/lex/tokenize_and_parse.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/tokenize_and_parse.hpp	(original)
+++ trunk/boost/spirit/home/lex/tokenize_and_parse.hpp	2009-07-05 14:50:29 EDT (Sun, 05 Jul 2009)
@@ -249,6 +249,29 @@
     //                  in its 'INITIAL' state.
     //
     ///////////////////////////////////////////////////////////////////////////
+    namespace detail
+    {
+        template <typename Token>
+        bool tokenize_callback(Token const& t, void (*f)(Token const&))
+        {
+            f(t);
+            return true;
+        }
+
+        template <typename Token, typename Eval>
+        bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
+        {
+            f(t);
+            return true;
+        }
+
+        template <typename Token>
+        bool tokenize_callback(Token const& t, bool (*f)(Token const&))
+        {
+            return f(t);
+        }
+    }
+
     template <typename Iterator, typename Lexer, typename F>
     inline bool
     tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
@@ -260,7 +283,7 @@
         iterator_type end = lex.end();
         for (/**/; iter != end; ++iter) 
         {
-            if (!f(*iter))
+            if (!detail::tokenize_callback(*iter, f))
                 return false;
         }
         return (iter == end) ? true : false;