$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r53178 - in trunk: boost/spirit/home/lex boost/spirit/home/lex/detail boost/spirit/home/lex/lexer boost/spirit/home/lex/lexer/lexertl boost/spirit/home/support/detail/lexer boost/spirit/home/support/detail/lexer/conversion boost/spirit/home/support/detail/lexer/parser boost/spirit/home/support/detail/lexer/parser/tree libs/spirit/example/lex/static_lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-05-22 12:34:32
Author: hkaiser
Date: 2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
New Revision: 53178
URL: http://svn.boost.org/trac/boost/changeset/53178
Log:
Spirit: Updated Spirit.Lex to support unique ids provided by the underlying lexertl implementation. Speeds up things quite a bit...
Text files modified: 
   trunk/boost/spirit/home/lex/detail/sequence_function.hpp                       |    22 +++++                                   
   trunk/boost/spirit/home/lex/lexer/action.hpp                                   |     7 ++                                      
   trunk/boost/spirit/home/lex/lexer/char_token_def.hpp                           |     9 +                                       
   trunk/boost/spirit/home/lex/lexer/lexer.hpp                                    |    12 +++                                     
   trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp                          |   128 +++++++++++++++------------------------ 
   trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp                  |   111 +++++++++++++++++-----------------      
   trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp               |    88 ++++-----------------------             
   trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp                            |    89 +++++++++++++++------------             
   trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp                   |    76 ++++++++++++-----------                 
   trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp                     |     9 ++                                      
   trunk/boost/spirit/home/lex/lexer/sequence.hpp                                 |    11 ++                                      
   trunk/boost/spirit/home/lex/lexer/string_token_def.hpp                         |     3                                         
   trunk/boost/spirit/home/lex/lexer/token_def.hpp                                |     7 +                                       
   trunk/boost/spirit/home/lex/lexer/token_set.hpp                                |    36 +++++++---                              
   trunk/boost/spirit/home/lex/reference.hpp                                      |     8 ++                                      
   trunk/boost/spirit/home/support/detail/lexer/consts.hpp                        |     8 +-                                      
   trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp |     2                                         
   trunk/boost/spirit/home/support/detail/lexer/debug.hpp                         |    12 +++                                     
   trunk/boost/spirit/home/support/detail/lexer/file_input.hpp                    |    31 +++++++--                               
   trunk/boost/spirit/home/support/detail/lexer/generator.hpp                     |    27 +++++---                                
   trunk/boost/spirit/home/support/detail/lexer/input.hpp                         |    70 +++++++++++++++++----                   
   trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp                 |    10 +-                                      
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp          |    10 ++                                      
   trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp              |     5 +                                       
   trunk/boost/spirit/home/support/detail/lexer/rules.hpp                         |   127 +++++++++++++++++++++++++++++++++------ 
   trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp                 |     6 +                                       
   trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp               |   101 +++++++++++++++++--------------         
   27 files changed, 607 insertions(+), 418 deletions(-)
Modified: trunk/boost/spirit/home/lex/detail/sequence_function.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/detail/sequence_function.hpp	(original)
+++ trunk/boost/spirit/home/lex/detail/sequence_function.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -16,22 +16,38 @@
 namespace boost { namespace spirit { namespace lex { namespace detail
 {
     template <typename LexerDef, typename String>
-    struct sequence_function
+    struct sequence_collect_function
     {
-        sequence_function(LexerDef& def_, String const& state_)
+        sequence_collect_function(LexerDef& def_, String const& state_)
           : def(def_), state(state_) {}
 
         template <typename Component>
         bool operator()(Component const& component) const
         {
             component.collect(def, state);
-            return false;   // execute for all sequence elements
+            return false;     // execute for all sequence elements
         }
 
         LexerDef& def;
         String const& state;
     };
 
+    template <typename LexerDef>
+    struct sequence_add_actions_function
+    {
+        sequence_add_actions_function(LexerDef& def_)
+          : def(def_) {}
+
+        template <typename Component>
+        bool operator()(Component const& component) const
+        {
+            component.add_actions(def);
+            return false;     // execute for all sequence elements
+        }
+
+        LexerDef& def;
+    };
+
 }}}}
 
 #endif
Modified: trunk/boost/spirit/home/lex/lexer/action.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/action.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/action.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -32,6 +32,13 @@
             // first collect the token definition information for the token_def 
             // this action is attached to
             subject.collect(lexdef, state);
+        }
+
+        template <typename LexerDef>
+        void add_actions(LexerDef& lexdef) const
+        {
+            // call to add all actions attached further down the hierarchy 
+            subject.add_actions(lexdef);
 
             // retrieve the id of the associated token_def and register the 
             // given semantic action with the lexer instance
Modified: trunk/boost/spirit/home/lex/lexer/char_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/char_token_def.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/char_token_def.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -61,18 +61,21 @@
     {
         typedef typename CharEncoding::char_type char_type;
 
-        char_token_def(char_type ch) : ch (ch) {}
+        char_token_def(char_type ch) : ch(ch), unique_id_(~0) {}
 
         template <typename LexerDef, typename String>
         void collect(LexerDef& lexdef, String const& state) const
         {
-            lexdef.add_token (state.c_str(), ch, 
-                static_cast<std::size_t>(ch));
+            lexdef.add_token (state.c_str(), ch, static_cast<std::size_t>(ch));
         }
 
+        template <typename LexerDef>
+        void add_actions(LexerDef& lexdef) const {}
+
         std::size_t id() const { return static_cast<std::size_t>(ch); }
 
         char_type ch;
+        mutable std::size_t unique_id_;
     };
 
     ///////////////////////////////////////////////////////////////////////////
Modified: trunk/boost/spirit/home/lex/lexer/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexer.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexer.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -197,12 +197,22 @@
             };
             friend struct pattern_adder;
 
+        private:
+            // Helper function to invoke the necessary 2 step compilation
+            // process on token definition expressions
+            template <typename TokenExpr>
+            void compile2pass(TokenExpr const& expr) 
+            {
+                expr.collect(def, state);
+                expr.add_actions(def);
+            }
+
         public:
             ///////////////////////////////////////////////////////////////////
             template <typename Expr>
             void define(Expr const& expr)
             {
-                compile<lex::domain>(expr).collect(def, state);
+                compile2pass(compile<lex::domain>(expr));
             }
 
             lexer_def_(LexerDef& def_, string_type const& state_)
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -57,14 +57,15 @@
               , rules(data_.rules_)
               , first(first_), last(last_) {}
 
-            std::size_t next(Iterator& end)
+            std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
-                return tokenizer::next(state_machine, first, end, last);
+                return tokenizer::next(state_machine, first, end, last, unique_id);
             }
 
             // nothing to invoke, so this is empty
-            bool invoke_actions(std::size_t, std::size_t, Iterator const&) 
+            bool invoke_actions(std::size_t, std::size_t, std::size_t
+              , Iterator const&) 
             {
                 return true;    // always accept
             }
@@ -96,11 +97,11 @@
             data (IterData const& data_, Iterator& first_, Iterator const& last_)
               : base_type(data_, first_, last_), state(0) {}
 
-            std::size_t next(Iterator& end)
+            std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
                 return tokenizer::next(this->state_machine, state, 
-                    this->first, end, this->last);
+                    this->first, end, this->last, unique_id);
             }
 
             std::size_t& get_state() { return state; }
@@ -135,60 +136,36 @@
 
             typedef void functor_type(iterpair_type, std::size_t, bool&, data&);
             typedef boost::function<functor_type> functor_wrapper_type;
-            struct action_key
-            {
-                action_key(std::size_t id, std::size_t state)
-                  : id_(id), state_(state) {}
-                action_key(std::pair<std::size_t, std::size_t> const& k)
-                  : id_(k.first), state_(k.second) {}
-
-                friend bool operator<(action_key const& lhs, action_key const& rhs)
-                {
-                    return lhs.id_ < rhs.id_ || 
-                          (lhs.id_ == rhs.id_ && lhs.state_ < rhs.state_);
-                }
-
-                std::size_t id_;
-                std::size_t state_;
-            };
-            typedef std::multimap<action_key, functor_wrapper_type> 
+            typedef std::vector<std::vector<functor_wrapper_type> >
                 semantic_actions_type;
 
-            typedef detail::wrap_action<functor_wrapper_type, iterpair_type, data>
-                wrap_action_type;
+            typedef detail::wrap_action<functor_wrapper_type
+              , iterpair_type, data> wrap_action_type;
 
             template <typename IterData>
             data (IterData const& data_, Iterator& first_, Iterator const& last_)
               : base_type(data_, first_, last_)
-              , actions(data_.actions_) {}
+              , actions_(data_.actions_) {}
 
             // invoke attached semantic actions, if defined
-            bool invoke_actions(std::size_t id, std::size_t state
-              , Iterator const& end)
+            bool invoke_actions(std::size_t state, std::size_t id
+              , std::size_t unique_id, Iterator const& end)
             {
-                if (actions.empty()) 
-                    return true;  // nothing to invoke, continue with 'match'
+                if (state >= actions_.size())
+                    return true;    // no action defined for this state
 
-                iterpair_type itp(this->first, end);
-                bool match = true;
+                std::vector<functor_wrapper_type> const& actions = actions_[state];
 
-                typedef typename semantic_actions_type::const_iterator 
-                    iterator_type;
+                if (unique_id >= actions.size() || !actions[unique_id]) 
+                    return true;    // nothing to invoke, continue with 'match'
 
-                std::pair<iterator_type, iterator_type> p = 
-                    actions.equal_range(action_key(id, state));
-
-                while (p.first != p.second)
-                {
-                    ((*p.first).second)(itp, id, match, *this);
-                    if (!match)
-                        return false;   // return a 'no-match'
-                    ++p.first;
-                }
-                return true;    // normal execution
+                iterpair_type itp(this->first, end);
+                bool match = true;
+                actions[unique_id](itp, id, match, *this);
+                return match;
             }
 
-            semantic_actions_type const& actions;
+            semantic_actions_type const& actions_;
         };
     }
 
@@ -297,7 +274,8 @@
 #endif
 
             Iterator end = data.first;
-            std::size_t id = data.next(end);
+            std::size_t unique_id = boost::lexer::npos;
+            std::size_t id = data.next(end, unique_id);
 
             if (boost::lexer::npos == id) {   // no match
 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
@@ -309,47 +287,43 @@
                 std::cerr << "Not matched, in state: " << data.state 
                           << ", lookahead: >" << next << "<" << std::endl;
 #endif
-                result = result_type(0);
+                return result = result_type(0);
             }
             else if (0 == id) {         // EOF reached
 #if defined(BOOST_SPIRIT_STATIC_EOF)
-                result = eof;
+                return result = eof;
 #else
-                result = mp.ftor.eof;
+                return result = mp.ftor.eof;
 #endif
             }
-            else {
+
 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
-                {
-                    std::string next;
-                    Iterator it = end;
-                    for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
-                        next += *it;
-
-                    std::cerr << "Matched: " << id << ", in state: " 
-                              << data.state << ", string: >" 
-                              << std::basic_string<char_type>(data.first, end) << "<"
-                              << ", lookahead: >" << next << "<" << std::endl;
-                }
+            {
+                std::string next;
+                Iterator it = end;
+                for (std::size_t i = 0; i < 10 && it != data.last; ++it, ++i)
+                    next += *it;
+
+                std::cerr << "Matched: " << id << ", in state: " 
+                          << data.state << ", string: >" 
+                          << std::basic_string<char_type>(data.first, end) << "<"
+                          << ", lookahead: >" << next << "<" << std::endl;
+            }
 #endif
-                // invoke_actions might change state
-                std::size_t state = data.get_state();
+            // invoke_actions might change state
+            std::size_t state = data.get_state();
 
-                // invoke attached semantic actions, if defined
-                if (!data.invoke_actions(id, state, end))
-                {
-                    // one of the semantic actions signaled no-match
-                    result = result_type(0); 
-                }
-                else 
-                {
-                    // return matched token, advancing 'data.first' past the 
-                    // matched sequence
-                    assign_on_exit<Iterator> on_exit(data.first, end);
-                    result = result_type(id, state, data.first, end);
-                }
+            // invoke attached semantic actions, if defined
+            if (!data.invoke_actions(state, id, unique_id, end))
+            {
+                // one of the semantic actions signaled no-match
+                return result = result_type(0); 
             }
-            return result;
+
+            // return matched token, advancing 'data.first' past the 
+            // matched sequence
+            assign_on_exit<Iterator> on_exit(data.first, end);
+            return result = result_type(id, state, data.first, end);
         }
 
         // set_state are propagated up to the iterator interface, allowing to 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -68,6 +68,10 @@
         }
         os_ << "};\n\n";
 
+        os_ << "// this variable defines the number of lexer states\n";
+        os_ << "std::size_t const lexer_state_count = " 
+            << rules_.statemap().size() << ";\n\n";
+
         return true;
     }
 
@@ -137,11 +141,12 @@
             os_ << "\n    ";
         }
 
-        os_ << "Iterator &start_token_, Iterator const& end_)\n";
+        os_ << "Iterator &start_token_, Iterator const& end_, ";
+        os_ << "std::size_t& unique_id_)\n";
         os_ << "{\n";
-        os_ << "    enum {end_state_index, id_index, state_index, bol_index, "
-          "eol_index,\n";
-        os_ << "        dead_state_index, dfa_offset};\n";
+        os_ << "    enum {end_state_index, id_index, unique_id_index, "
+          "state_index, bol_index,\n";
+        os_ << "        eol_index, dead_state_index, dfa_offset};\n";
         os_ << "    static const std::size_t npos = static_cast"
           "<std::size_t>(~0);\n";
 
@@ -156,7 +161,7 @@
                 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
 
                 os_ << "    static const std::size_t lookup" << state_ 
-                    << "_[" << lookups_ << "] = {";
+                    << "_[" << lookups_ << "] = {\n        ";
 
                 for (/**/; i_ < count_; ++i_)
                 {
@@ -171,16 +176,16 @@
 
                     if (i_ < count_ - 1)
                     {
-                        os_ << "," << std::endl << "        ";
+                        os_ << ",\n        ";
                     }
 
                     j_ = 1;
                 }
 
-                os_ << "};\n";
+                os_ << " };\n";
                 count_ = sm_.data()._dfa[state_]->size ();
                 os_ << "    static const std::size_t dfa" << state_ << "_[" <<
-                    count_ << "] = {";
+                    count_ << "] = {\n        ";
                 count_ /= 8;
 
                 for (i_ = 0; i_ < count_; ++i_)
@@ -196,7 +201,7 @@
 
                     if (i_ < count_ - 1)
                     {
-                        os_ << "," << std::endl << "        ";
+                        os_ << ",\n        ";
                     }
                 }
 
@@ -218,25 +223,23 @@
                     }
                 }
 
-                os_ << "};\n";
+                os_ << " };\n";
             }
 
             std::size_t count_ = sm_.data()._dfa_alphabet.size();
             std::size_t i_ = 1;
 
             os_ << "    static const std::size_t *lookup_arr_[" << count_ <<
-                "] = {";
-
-            os_ << "lookup0_";
+                "] = { lookup0_";
 
             for (i_ = 1; i_ < count_; ++i_)
             {
                 os_ << ", " << "lookup" << i_ << "_";
             }
 
-            os_ << "};\n";
+            os_ << " };\n";
             os_ << "    static const std::size_t dfa_alphabet_arr_[" << 
-                count_ << "] = {";
+                count_ << "] = { ";
 
             os_ << sm_.data()._dfa_alphabet.front ();
 
@@ -245,9 +248,9 @@
                 os_ << ", " << sm_.data()._dfa_alphabet[i_];
             }
 
-            os_ << "};\n";
+            os_ << " };\n";
             os_ << "    static const std::size_t *dfa_arr_[" << count_ <<
-                "] = {";
+                "] = { ";
             os_ << "dfa0_";
 
             for (i_ = 1; i_ < count_; ++i_)
@@ -255,7 +258,7 @@
                 os_ << ", " << "dfa" << i_ << "_";
             }
 
-            os_ << "};\n";
+            os_ << " };\n";
         }
         else
         {
@@ -266,7 +269,7 @@
             std::size_t count_ = lookups_ / 8;
 
             os_ << "    static const std::size_t lookup_[";
-            os_ << sm_.data()._lookup[0]->size() << "] = {";
+            os_ << sm_.data()._lookup[0]->size() << "] = {\n        ";
 
             for (; i_ < count_; ++i_)
             {
@@ -281,17 +284,17 @@
 
                 if (i_ < count_ - 1)
                 {
-                    os_ << "," << std::endl << "        ";
+                    os_ << ",\n        ";
                 }
 
                 j_ = 1;
             }
 
-            os_ << "};\n";
+            os_ << " };\n";
             os_ << "    static const std::size_t dfa_alphabet_ = " <<
               sm_.data()._dfa_alphabet.front () << ";\n";
             os_ << "    static const std::size_t dfa_[" <<
-              sm_.data()._dfa[0]->size () << "] = {";
+              sm_.data()._dfa[0]->size () << "] = {\n        ";
                 count_ = sm_.data()._dfa[0]->size () / 8;
 
             for (i_ = 0; i_ < count_; ++i_)
@@ -307,7 +310,7 @@
 
                 if (i_ < count_ - 1)
                 {
-                    os_ << "," << std::endl << "        ";
+                    os_ << ",\n        ";
                 }
             }
 
@@ -330,7 +333,7 @@
                 }
             }
 
-            os_ << "};\n";
+            os_ << " };\n";
         }
 
         os_ << "\n    if (start_token_ == end_) return 0;\n\n";
@@ -347,24 +350,20 @@
         os_ << "    Iterator curr_ = start_token_;\n";
         os_ << "    bool end_state_ = *ptr_ != 0;\n";
         os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
-        os_ << "    Iterator end_token_ = start_token_;\n";
-        os_ << '\n';
+        os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+        os_ << "    Iterator end_token_ = start_token_;\n\n";
+
         os_ << "    while (curr_ != end_)\n";
         os_ << "    {\n";
 
         if (sm_.data()._seen_BOL_assertion)
         {
-            os_ << "        std::size_t const BOL_state_ = ptr_[bol_index];\n";
+            os_ << "        std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
         }
 
         if (sm_.data()._seen_EOL_assertion)
         {
-            os_ << "        std::size_t const EOL_state_ = ptr_[eol_index];\n";
-        }
-
-        if (sm_.data()._seen_BOL_assertion || sm_.data()._seen_EOL_assertion)
-        {
-            os_ << '\n';
+            os_ << "        std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
         }
 
         if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
@@ -396,7 +395,7 @@
             os_ << "            if (state_ == 0) break;\n";
             os_ << '\n';
             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
-            os_ << "        }\n";
+            os_ << "        }\n\n";
         }
         else if (sm_.data()._seen_BOL_assertion)
         {
@@ -411,8 +410,8 @@
 
             if (lookups_ == 256)
             {
-                os_ << "                ptr_[lookup_[static_cast<unsigned char>\n";
-                os_ << "                (*curr_++)]];\n";
+                os_ << "                ptr_[lookup_[static_cast<unsigned char>"
+                       "(*curr_++)]];\n";
             }
             else
             {
@@ -423,7 +422,7 @@
             os_ << "            if (state_ == 0) break;\n";
             os_ << '\n';
             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
-            os_ << "        }\n";
+            os_ << "        }\n\n";
         }
         else if (sm_.data()._seen_EOL_assertion)
         {
@@ -437,8 +436,8 @@
 
             if (lookups_ == 256)
             {
-                os_ << "                ptr_[lookup_[static_cast<unsigned char>\n";
-                os_ << "                    (*curr_++)]];\n";
+                os_ << "                ptr_[lookup_[static_cast<unsigned char>"
+                       "(*curr_++)]];\n";
             }
             else
             {
@@ -449,7 +448,7 @@
             os_ << "            if (state_ == 0) break;\n";
             os_ << '\n';
             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
-            os_ << "        }\n";
+            os_ << "        }\n\n";
         }
         else
         {
@@ -457,8 +456,8 @@
 
             if (lookups_ == 256)
             {
-                os_ << "            ptr_[lookup_[static_cast<unsigned char>\n";
-                os_ << "                (*curr_++)]];\n";
+                os_ << "            ptr_[lookup_[static_cast<unsigned char>"
+                       "(*curr_++)]];\n";
             }
             else
             {
@@ -468,14 +467,14 @@
             os_ << '\n';
             os_ << "        if (state_ == 0) break;\n";
             os_ << '\n';
-            os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+            os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
         }
 
-        os_ << '\n';
         os_ << "        if (*ptr_)\n";
         os_ << "        {\n";
         os_ << "            end_state_ = true;\n";
         os_ << "            id_ = *(ptr_ + id_index);\n";
+        os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
 
         if (dfas_ > 1)
         {
@@ -484,21 +483,21 @@
 
         os_ << "            end_token_ = curr_;\n";
         os_ << "        }\n";
-        os_ << "    }\n";
-        os_ << '\n';
+        os_ << "    }\n\n";
 
         if (sm_.data()._seen_EOL_assertion)
         {
-            os_ << "    const std::size_t EOL_state_ = ptr_[eol_index];\n";
-            os_ << '\n';
+            os_ << "    const std::size_t EOL_state_ = ptr_[eol_index];\n\n";
+
             os_ << "    if (EOL_state_ && curr_ == end_)\n";
             os_ << "    {\n";
-            os_ << "        ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
-            os_ << '\n';
+            os_ << "        ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
+
             os_ << "        if (*ptr_)\n";
             os_ << "        {\n";
             os_ << "            end_state_ = true;\n";
             os_ << "            id_ = *(ptr_ + id_index);\n";
+            os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
 
             if (dfas_ > 1)
             {
@@ -507,8 +506,7 @@
 
             os_ << "            end_token_ = curr_;\n";
             os_ << "        }\n";
-            os_ << "    }\n";
-            os_ << '\n';
+            os_ << "    }\n\n";
         }
 
         os_ << "    if (end_state_)\n";
@@ -518,7 +516,6 @@
 
         if (dfas_ > 1)
         {
-            os_ << '\n';
             os_ << "        if (id_ == 0) goto again;\n";
         }
 
@@ -533,8 +530,10 @@
         }
 
         os_ << "        id_ = npos;\n";
-        os_ << "    }\n";
-        os_ << '\n';
+        os_ << "        uid_ = npos;\n";
+        os_ << "    }\n\n";
+
+        os_ << "    unique_id_ = uid_;\n";
         os_ << "    return id_;\n";
         os_ << "}\n\n";
 
@@ -555,7 +554,7 @@
     {
         if (!lexer.init_dfa())
             return false;
-        return detail::generate_cpp(lexer.state_machine, lexer.rules, os
+        return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
           , name_suffix, false, false);
     }
 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -24,82 +24,11 @@
         typedef typename boost::detail::iterator_traits<Iterator>::value_type 
             char_type;
 
-//         static std::size_t next (const std::size_t * const lookup_,
-//             std::size_t const dfa_alphabet_, const std::size_t *  const dfa_,
-//             Iterator const& start_, Iterator &start_token_,
-//             Iterator const& end_)
-//         {
-//             if (start_token_ == end_) return 0;
-// 
-//             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
-//             Iterator curr_ = start_token_;
-//             bool end_state_ = *ptr_ != 0;
-//             std::size_t id_ = *(ptr_ + lexer::id_index);
-//             Iterator end_token_ = start_token_;
-// 
-//             while (curr_ != end_)
-//             {
-//                 std::size_t const BOL_state_ = ptr_[lexer::bol_index];
-//                 std::size_t const EOL_state_ = ptr_[lexer::eol_index];
-// 
-//                 if (BOL_state_ && (start_token_ == start_ ||
-//                     *(start_token_ - 1) == '\n'))
-//                 {
-//                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
-//                 }
-//                 else if (EOL_state_ && *curr_ == '\n')
-//                 {
-//                     ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-//                 }
-//                 else
-//                 {
-//                     std::size_t const state_ = ptr_[lookup_[*curr_++]];
-// 
-//                     if (state_ == 0)
-//                     {
-//                         break;
-//                     }
-// 
-//                     ptr_ = &dfa_[state_ * dfa_alphabet_];
-//                 }
-// 
-//                 if (*ptr_)
-//                 {
-//                     end_state_ = true;
-//                     id_ = *(ptr_ + lexer::id_index);
-//                     end_token_ = curr_;
-//                 }
-//             }
-// 
-//             const std::size_t EOL_state_ = ptr_[lexer::eol_index];
-// 
-//             if (EOL_state_ && curr_ == end_)
-//             {
-//                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
-// 
-//                 if (*ptr_)
-//                 {
-//                     end_state_ = true;
-//                     id_ = *(ptr_ + lexer::id_index);
-//                     end_token_ = curr_;
-//                 }
-//             }
-// 
-//             if (end_state_) {
-//                 // return longest match
-//                 start_token_ = end_token_;
-//             }
-//             else {
-//                 id_ = lexer::npos;
-//             }
-// 
-//             return id_;
-//         }
-
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
           , std::size_t &dfa_state_, Iterator const& start_
-          , Iterator &start_token_, Iterator const& end_)
+          , Iterator &start_token_, Iterator const& end_
+          , std::size_t& unique_id_)
         {
             if (start_token_ == end_) return 0;
 
@@ -112,6 +41,7 @@
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
+            std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -154,6 +84,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+                    uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     dfa_state_ = *(ptr_ + boost::lexer::state_index);
                     end_token_ = curr_;
                 }
@@ -169,6 +100,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+                    uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     dfa_state_ = *(ptr_ + boost::lexer::state_index);
                     end_token_ = curr_;
                 }
@@ -183,15 +115,18 @@
             }
             else {
                 id_ = boost::lexer::npos;
+                uid_ = boost::lexer::npos;
             }
 
+            unique_id_ = uid_;
             return id_;
         }
 
         ///////////////////////////////////////////////////////////////////////
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
-          , Iterator const& start_, Iterator &start_token_, Iterator const& end_)
+          , Iterator const& start_, Iterator &start_token_, Iterator const& end_
+          , std::size_t& unique_id_)
         {
             if (start_token_ == end_) return 0;
 
@@ -202,6 +137,7 @@
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
+            std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -244,6 +180,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+                    uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     end_token_ = curr_;
                 }
             }
@@ -258,6 +195,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
+                    uid_ = *(ptr_ + boost::lexer::unique_id_index);
                     end_token_ = curr_;
                 }
             }
@@ -268,8 +206,10 @@
             }
             else {
                 id_ = boost::lexer::npos;
+                uid_ = boost::lexer::npos;
             }
 
+            unique_id_ = uid_;
             return id_;
         }
     };
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/lexer.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -117,8 +117,7 @@
 
         std::size_t add_state(char_type const* state)
         {
-            rules.add_state(state);
-            return rules.state(state);
+            return rules.add_state(state);
         }
         string_type initial_state() const 
         { 
@@ -194,7 +193,7 @@
         // operator_bool() is needed for the safe_bool base class
         operator typename safe_bool<lexer>::result_type() const 
         { 
-            return safe_bool<lexer>()(initialized_dfa); 
+            return safe_bool<lexer>()(initialized_dfa_); 
         }
 
         typedef typename boost::detail::iterator_traits<Iterator>::value_type 
@@ -225,7 +224,7 @@
             if (!init_dfa())
                 return iterator_type();
 
-            iterator_data_type iterator_data = { state_machine, rules, actions };
+            iterator_data_type iterator_data = { state_machine_, rules_, actions_ };
             return iterator_type(iterator_data, first, last);
         }
 
@@ -249,10 +248,10 @@
         }
 
         //  Lexer instances can be created by means of a derived class only.
-        lexer(unsigned int flags_) 
-          : initialized_dfa(false), flags(map_flags(flags_)) 
+        lexer(unsigned int flags) 
+          : flags_(map_flags(flags)), initialized_dfa_(false)
         {
-            rules.flags(flags);
+            rules_.flags(flags_);
         }
 
     public:
@@ -261,15 +260,15 @@
             std::size_t token_id)
         {
             add_state(state);
-            rules.add(state, detail::escape(tokendef), token_id, state);
-            initialized_dfa = false;
+            rules_.add(state, detail::escape(tokendef), token_id, state);
+            initialized_dfa_ = false;
         }
         void add_token(char_type const* state, string_type const& tokendef, 
             std::size_t token_id)
         {
             add_state(state);
-            rules.add(state, tokendef, token_id, state);
-            initialized_dfa = false;
+            rules_.add(state, tokendef, token_id, state);
+            initialized_dfa_ = false;
         }
 
         // Allow a token_set to be associated with this lexer instance. This 
@@ -278,8 +277,8 @@
         void add_token(char_type const* state, token_set const& tokset)
         {
             add_state(state);
-            rules.add(state, tokset.get_rules());
-            initialized_dfa = false;
+            rules_.add(state, tokset.get_rules());
+            initialized_dfa_ = false;
         }
 
         // Allow to associate a whole lexer instance with another lexer 
@@ -287,12 +286,12 @@
         // lexer into this instance.
         template <typename Token_, typename Iterator_, typename Functor_
           , typename TokenSet_>
-        void add_token(char_type const* state
+        std::size_t add_token(char_type const* state
           , lexer<Token_, Iterator_, Functor_, TokenSet_> const& lexer_def)
         {
             add_state(state);
-            rules.add(state, lexer_def.get_rules());
-            initialized_dfa = false;
+            rules_.add(state, lexer_def.get_rules());
+            initialized_dfa_ = false;
         }
 
         // interface for pattern definition management
@@ -300,32 +299,31 @@
             string_type const& patterndef)
         {
             add_state(state);
-            rules.add_macro(name.c_str(), patterndef);
-            initialized_dfa = false;
+            rules_.add_macro(name.c_str(), patterndef);
+            initialized_dfa_ = false;
         }
 
-        boost::lexer::rules const& get_rules() const { return rules; }
+        boost::lexer::rules const& get_rules() const { return rules_; }
 
         void clear(char_type const* state)
         {
-            std::size_t s = rules.state(state);
+            std::size_t s = rules_.state(state);
             if (boost::lexer::npos != s)
-                rules.clear(state);
-            initialized_dfa = false;
+                rules_.clear(state);
+            initialized_dfa_ = false;
         }
         std::size_t add_state(char_type const* state)
         {
-            std::size_t stateid = rules.state(state);
+            std::size_t stateid = rules_.state(state);
             if (boost::lexer::npos == stateid) {
-                rules.add_state(state);
-                stateid = rules.state(state);
-                initialized_dfa = false;
+                stateid = rules_.add_state(state);
+                initialized_dfa_ = false;
             }
             return stateid;
         }
         string_type initial_state() const 
         { 
-            return string_type(rules.initial());
+            return string_type(rules_.initial());
         }
 
         //  Register a semantic action with the given id
@@ -341,32 +339,43 @@
                 value_type;
             typedef typename Functor::wrap_action_type wrapper_type;
 
-            actions.insert(value_type(std::make_pair(id, state)
-              , wrapper_type::call(act)));
+            if (actions_.size() <= state)
+                actions_.resize(state + 1); 
+
+            std::size_t unique_id = rules_.retrieve_id(state, id);
+            BOOST_ASSERT(boost::lexer::npos != unique_id);
+
+            value_type& actions (actions_[state]);
+            if (actions.size() <= unique_id)
+                actions.resize(unique_id + 1); 
+
+            actions[unique_id] = wrapper_type::call(act);
         }
 
         bool init_dfa() const
         {
-            if (!initialized_dfa) {
-                state_machine.clear();
+            if (!initialized_dfa_) {
+                state_machine_.clear();
                 typedef boost::lexer::basic_generator<char_type> generator;
-                generator::build (rules, state_machine);
-                generator::minimise (state_machine);
+                generator::build (rules_, state_machine_);
+                generator::minimise (state_machine_);
 
 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
-                boost::lexer::debug::dump(state_machine, std::cerr);
+                boost::lexer::debug::dump(state_machine_, std::cerr);
 #endif
-                initialized_dfa = true;
+                initialized_dfa_ = true;
             }
             return true;
         }
 
     private:
-        mutable boost::lexer::basic_state_machine<char_type> state_machine;
-        boost::lexer::basic_rules<char_type> rules;
-        typename Functor::semantic_actions_type actions;
-        mutable bool initialized_dfa;
-        boost::lexer::regex_flags flags;
+        // lexertl specific data
+        mutable boost::lexer::basic_state_machine<char_type> state_machine_;
+        boost::lexer::basic_rules<char_type> rules_;
+        boost::lexer::regex_flags flags_;
+
+        typename Functor::semantic_actions_type actions_;
+        mutable bool initialized_dfa_;
 
         template <typename Lexer> 
         friend bool generate_static(Lexer const&, std::ostream&, char const*);
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_functor.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -49,7 +49,7 @@
             char_type;
 
             typedef std::size_t (*next_token_functor)(std::size_t&, 
-                Iterator const&, Iterator&, Iterator const&);
+                Iterator const&, Iterator&, Iterator const&, std::size_t&);
 
             typedef unused_type semantic_actions_type;
 
@@ -62,15 +62,15 @@
               : next_token(data_.next_), first(first_), last(last_)
             {}
 
-            std::size_t next(Iterator& end)
+            std::size_t next(Iterator& end, std::size_t& unique_id)
             {
-                typedef basic_iterator_tokeniser<Iterator> tokenizer;
                 std::size_t state;
-                return next_token(state, first, end, last);
+                return next_token(state, first, end, last, unique_id);
             }
 
             // nothing to invoke, so this is empty
-            bool invoke_actions(std::size_t, Iterator const&) 
+            bool invoke_actions(std::size_t, std::size_t, std::size_t
+              , Iterator const&) 
             {
                 return true;    // always accept
             }
@@ -93,9 +93,8 @@
 
             typedef typename base_type::state_type state_type;
             typedef typename base_type::char_type char_type;
-            typedef 
-                typename base_type::semantic_actions_type 
-            semantic_actions_type;
+            typedef typename base_type::semantic_actions_type 
+                semantic_actions_type;
 
             // initialize the shared data 
             template <typename IterData>
@@ -103,17 +102,24 @@
               : base_type(data_, first_, last_), state(0)
             {}
 
-            std::size_t next(Iterator& end)
+            std::size_t next(Iterator& end, std::size_t& unique_id)
             {
-                typedef basic_iterator_tokeniser<Iterator> tokenizer;
-                return this->next_token(state, this->first, end, this->last);
+                return this->next_token(state, this->first, end, this->last
+                  , unique_id);
             }
 
             std::size_t& get_state() { return state; }
             void set_state_name (char_type const* new_state) 
             { 
-                std::size_t state_id = this->rules.state(new_state);
+                this->rules.state(new_state);
+                for (std::size_t state_id = 0; 
+                     state_id < sizeof(lexer_state_names)/sizeof(lexer_state_names[0]); ++state_id)
+
+                // if the following assertion fires you've probably been using 
+                // a lexer state name which was not defined in your token 
+                // definition
                 BOOST_ASSERT(state_id != boost::lexer::npos);
+
                 if (state_id != boost::lexer::npos)
                     state = state_id;
             }
@@ -135,7 +141,7 @@
 
             typedef void functor_type(iterpair_type, std::size_t, bool&, static_data&);
             typedef boost::function<functor_type> functor_wrapper_type;
-            typedef std::multimap<std::size_t, functor_wrapper_type> 
+            typedef std::vector<std::vector<functor_wrapper_type> >
                 semantic_actions_type;
 
             typedef detail::wrap_action<functor_wrapper_type
@@ -143,34 +149,31 @@
 
             template <typename IterData>
             static_data (IterData const& data_, Iterator& first_, Iterator const& last_)
-              : base_type(data_, first_, last_),
-                actions(data_.actions_)
-            {}
+              : base_type(data_, first_, last_)
+              , actions(data_.actions_), state_names_(data_.state_names_)
+              , state_count_(data_.state_count_) {}
 
             // invoke attached semantic actions, if defined
-            bool invoke_actions(std::size_t id, Iterator const& end)
+            bool invoke_actions(std::size_t state, std::size_t id
+              , std::size_t unique_id, Iterator const& end)
             {
-                if (actions.empty()) 
-                    return true;  // nothing to invoke, continue with 'match'
+                if (state >= actions_.size())
+                    return true;    // no action defined for this state
 
-                iterpair_type itp(this->first, end);
-                bool match = true;
+                std::vector<functor_wrapper_type> const& actions = actions_[state];
 
-                typedef typename semantic_actions_type::const_iterator 
-                    iterator_type;
+                if (unique_id >= actions.size() || !actions[unique_id]) 
+                    return true;    // nothing to invoke, continue with 'match'
 
-                std::pair<iterator_type, iterator_type> p = actions.equal_range(id);
-                while (p.first != p.second)
-                {
-                    ((*p.first).second)(itp, id, match, *this);
-                    if (!match)
-                        return false;   // return a 'no-match'
-                    ++p.first;
-                }
-                return true;    // normal execution
+                iterpair_type itp(this->first, end);
+                bool match = true;
+                actions[unique_id](itp, id, match, *this);
+                return match;
             }
 
-            semantic_actions_type const& actions;
+            semantic_actions_type const& actions_;
+            std::size_t const state_count_;
+            const char* const* state_names_;
         };
     }
 
@@ -279,8 +282,9 @@
 #endif
 
             Iterator end = data.first;
-            std::size_t id = data.next(end);
-            
+            std::size_t unique_id = boost::lexer::npos;
+            std::size_t id = data.next(end, unique_id);
+
             if (boost::lexer::npos == id) {   // no match
 #if defined(BOOST_SPIRIT_DEBUG)
                 std::string next;
@@ -318,7 +322,7 @@
             std::size_t state = data.get_state();
 
             // invoke attached semantic actions, if there are any defined
-            if (!data.invoke_actions(id, end))
+            if (!data.invoke_actions(state, id, unique_id, end))
             {
                 // one of the semantic actions signaled no-match
                 return result = result_type(0); 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -30,6 +30,7 @@
         //  This is a forward declaration for the generated static table of 
         //  valid state names
         extern char const* const lexer_state_names[];
+        extern std::size_t const lexer_state_count;
 
         //  This is the forward declaration of the generated function to be 
         //  called to get the next token. 
@@ -158,6 +159,8 @@
         {
             typename Functor::next_token_functor next_;
             typename Functor::semantic_actions_type const& actions_;
+            std::size_t const state_count_;
+            const char* const* state_names_;
         };
 
     public:
@@ -167,7 +170,8 @@
         template <typename F>
         iterator_type begin(Iterator& first, Iterator const& last, F next) const
         { 
-            iterator_data_type iterator_data = { next, actions };
+            iterator_data_type iterator_data = { next, actions
+              , static_::lexer_state_count, static_::lexer_state_names };
             return iterator_type(iterator_data, first, last);
         }
 
@@ -178,7 +182,8 @@
         iterator_type begin(Iterator_& first, Iterator_ const& last) const
         { 
             iterator_data_type iterator_data = 
-                { &lex::lexertl::static_::next_token<Iterator_>, actions };
+                { &lex::lexertl::static_::next_token<Iterator_>, actions,
+                  static_::lexer_state_count, static_::lexer_state_names };
             return iterator_type(iterator_data, first, last);
         }
 
Modified: trunk/boost/spirit/home/lex/lexer/sequence.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/sequence.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/sequence.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -40,9 +40,16 @@
           : elements(elements) {}
 
         template <typename LexerDef, typename String>
-        void collect(LexerDef& lexdef, String const& state)
+        void collect(LexerDef& lexdef, String const& state) const
         {
-            detail::sequence_function<LexerDef, String> f (lexdef, state);
+            detail::sequence_collect_function<LexerDef, String> f (lexdef, state);
+            fusion::any(elements, f);
+        }
+
+        template <typename LexerDef>
+        void add_actions(LexerDef& lexdef) const 
+        {
+            detail::sequence_add_actions_function<LexerDef> f (lexdef);
             fusion::any(elements, f);
         }
 
Modified: trunk/boost/spirit/home/lex/lexer/string_token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/string_token_def.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/string_token_def.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -69,6 +69,9 @@
             lexdef.add_token (state.c_str(), str_, id_);
         }
 
+        template <typename LexerDef>
+        void add_actions(LexerDef& lexdef) const {}
+
         std::size_t id() const { return id_; }
 
         string_type str_;
Modified: trunk/boost/spirit/home/lex/lexer/token_def.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/token_def.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/token_def.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -168,6 +168,9 @@
                 lexdef.add_token(state.c_str(), get<char_type>(def), token_id);
         }
 
+        template <typename LexerDef>
+        void add_actions(LexerDef& lexdef) const {}
+
     public:
         typedef Char char_type;
         typedef Idtype id_type;
@@ -180,8 +183,7 @@
 
         explicit token_def(char_type def_, Idtype id_ = Idtype())
           : proto_base_type(terminal_type::make(alias()))
-          , def(def_)
-          , token_id(Idtype() == id_ ? def_ : id_)
+          , def(def_), token_id(Idtype() == id_ ? def_ : id_)
           , token_state(~0) {}
 
         explicit token_def(string_type const& def_, Idtype id_ = Idtype())
@@ -207,6 +209,7 @@
         // general accessors 
         Idtype id() const { return token_id; }
         void id(Idtype id) { token_id = id; }
+
         string_type definition() const 
         { 
             return (0 == def.which()) 
Modified: trunk/boost/spirit/home/lex/lexer/token_set.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/token_set.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/token_set.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -91,10 +91,10 @@
 
                 //  If the following assertion fires you probably forgot to
                 //  associate this token set definition with a lexer instance.
-                BOOST_ASSERT(~0 != token_state);
+                BOOST_ASSERT(~0 != token_state_);
 
                 token_type &t = *first;
-                if (token_is_valid(t) && token_state == t.state()) {
+                if (token_is_valid(t) && token_state_ == t.state()) {
                 // any of the token definitions matched
                     qi::detail::assign_to(t, attr);
                     ++first;
@@ -123,12 +123,15 @@
             // is not possible. Please create a separate token_set instance 
             // from the same set of regular expressions for each lexer state it 
             // needs to be associated with.
-            BOOST_ASSERT(~0 == token_state || state_id == token_state);
+            BOOST_ASSERT(~0 == token_state_ || state_id == token_state_);
 
-            token_state = state_id;
+            token_state_ = state_id;
             lexdef.add_token (state.c_str(), *this);
         }
 
+        template <typename LexerDef>
+        void add_actions(LexerDef& lexdef) const {}
+
     private:
         // allow to use the tokset.add("regex1", id1)("regex2", id2);
         // syntax
@@ -176,15 +179,15 @@
                     tokdef.id(token_id);
                 }
 
-                def.add_token (def.initial_state().c_str(), tokdef.definition(),
-                    token_id);
+                def.add_token(def.initial_state().c_str(), tokdef.definition()
+                  , token_id);
                 return *this;
             }
 
             template <typename TokenSet_>
             adder const& operator()(token_set<TokenSet_> const& tokset) const
             {
-                def.add_token (def.initial_state().c_str(), tokset);
+                def.add_token(def.initial_state().c_str(), tokset);
                 return *this;
             }
 
@@ -209,18 +212,27 @@
         };
         friend struct pattern_adder;
 
+    private:
+        // Helper function to invoke the necessary 2 step compilation process 
+        // on token definition expressions
+        template <typename TokenExpr>
+        void compile2pass(TokenExpr const& expr) 
+        {
+            expr.collect(*this, base_token_set::initial_state());
+            expr.add_actions(*this);
+        }
+
     public:
         ///////////////////////////////////////////////////////////////////
         template <typename Expr>
         void define(Expr const& expr)
         {
-            compile<lex::domain>(expr).collect(
-                *this, base_token_set::initial_state());
+            compile2pass(compile<lex::domain>(expr));
         }
 
         token_set()
           : proto_base_type(terminal_type::make(alias()))
-          , add(this_()), add_pattern(this_()), token_state(~0) {}
+          , add(this_()), add_pattern(this_()), token_state_(~0) {}
 
         // allow to assign a token definition expression
         template <typename Expr>
@@ -240,10 +252,10 @@
         adder add;
         pattern_adder add_pattern;
 
-        std::size_t state() const { return token_state; }
+        std::size_t state() const { return token_state_; }
 
     private:
-        mutable std::size_t token_state;
+        mutable std::size_t token_state_;
     };
 
     // allow to assign a token definition expression
Modified: trunk/boost/spirit/home/lex/reference.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/reference.hpp	(original)
+++ trunk/boost/spirit/home/lex/reference.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -37,7 +37,13 @@
         template <typename LexerDef, typename String>
         void collect(LexerDef& lexdef, String const& state) const
         {
-            return this->ref.get().collect(lexdef, state);
+            this->ref.get().collect(lexdef, state);
+        }
+
+        template <typename LexerDef>
+        void add_actions(LexerDef& lexdef) const 
+        {
+            this->ref.get().add_actions(lexdef);
         }
     };
 
Modified: trunk/boost/spirit/home/support/detail/lexer/consts.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/consts.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/consts.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -15,10 +15,10 @@
 namespace lexer
 {
     enum regex_flags {none = 0, icase = 1, dot_not_newline = 2};
-    // 0 = end state, 1 = id, 2 = lex state, 3 = bol, 4 = eol,
-    // 5 = dead_state_index
-    enum {end_state_index, id_index, state_index, bol_index, eol_index,
-        dead_state_index, dfa_offset};
+    // 0 = end state, 1 = id, 2 = unique_id, 3 = lex state, 4 = bol, 5 = eol,
+    // 6 = dead_state_index
+    enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
+        eol_index, dead_state_index, dfa_offset};
 
     const std::size_t max_macro_len = 30;
     const std::size_t num_chars = 256;
Modified: trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/conversion/char_state_machine.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -29,6 +29,7 @@
 
         bool _end_state;
         std::size_t _id;
+        std::size_t _unique_id;
         std::size_t _state;
         std::size_t _bol_index;
         std::size_t _eol_index;
@@ -37,6 +38,7 @@
         state () :
             _end_state (false),
             _id (0),
+            _unique_id (npos),
             _state (0),
             _bol_index (npos),
             _eol_index (npos)
Modified: trunk/boost/spirit/home/support/detail/lexer/debug.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/debug.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/debug.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -165,6 +165,8 @@
             {
                 end_state (stream_);
                 stream_ << iter_->id;
+                unique_id (stream_);
+                stream_ << iter_->unique_id;
                 dfa (stream_);
                 stream_ << iter_->goto_dfa;
                 stream_ << std::endl;
@@ -281,6 +283,16 @@
         stream_ << L"  END STATE, Id = ";
     }
 
+    static void unique_id (std::ostream &stream_)
+    {
+        stream_ << ", Unique Id = ";
+    }
+
+    static void unique_id (std::wostream &stream_)
+    {
+        stream_ << L", Unique Id = ";
+    }
+
     static void any (std::ostream &stream_)
     {
         stream_ << "  . -> ";
Modified: trunk/boost/spirit/home/support/detail/lexer/file_input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/file_input.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/file_input.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -33,6 +33,7 @@
         struct data
         {
             std::size_t id;
+            std::size_t unique_id;
             const CharT *start;
             const CharT *end;
             std::size_t state;
@@ -40,14 +41,16 @@
             // Construct in end() state.
             data () :
                 id (0),
+                unique_id (npos),
                 state (npos)
             {
             }
 
             bool operator == (const data &rhs_) const
             {
-                return id == rhs_.id && start == rhs_.start &&
-                    end == rhs_.end && state == rhs_.state;
+                return id == rhs_.id && unique_id == rhs_.unique_id &&
+                    start == rhs_.start && end == rhs_.end &&
+                    state == rhs_.state;
             }
         };
 
@@ -105,12 +108,12 @@
                 _data.id = _input->next (&internals_._lookup->front ()->
                     front (), internals_._dfa_alphabet.front (),
                     &internals_._dfa->front ()->front (), _data.start,
-                    _data.end);
+                    _data.end, _data.unique_id);
             }
             else
             {
                 _data.id = _input->next (internals_, _data.state, _data.start,
-                    _data.end);
+                    _data.end, _data.unique_id);
             }
 
             if (_data.id == 0)
@@ -156,6 +159,7 @@
         iterator iter_;
 
         iter_._input = this;
+        // Over-ride default of 0 (EOF)
         iter_._data.id = npos;
         iter_._data.start = 0;
         iter_._data.end = 0;
@@ -200,7 +204,8 @@
     CharT *_end_buffer;
 
     std::size_t next (const detail::internals &internals_,
-        std::size_t &start_state_, const CharT * &start_, const CharT * &end_)
+        std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
+        std::size_t &unique_id_)
     {
         _start_token = _end_token;
 
@@ -213,6 +218,7 @@
         const CharT *curr_ = _start_token;
         bool end_state_ = *ptr_ != 0;
         std::size_t id_ = *(ptr_ + id_index);
+        std::size_t uid_ = *(ptr_ + unique_id_index);
         const CharT *end_token_ = curr_;
 
         for (;;)
@@ -255,6 +261,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+                uid_ = *(ptr_ + unique_id_index);
                 start_state_ = *(ptr_ + state_index);
                 end_token_ = curr_;
             }
@@ -263,6 +270,7 @@
         if (_start_token >= _end_buffer)
         {
             // No more tokens...
+            unique_id_ = npos;
             return 0;
         }
 
@@ -276,6 +284,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+                uid_ = *(ptr_ + unique_id_index);
                 start_state_ = *(ptr_ + state_index);
                 end_token_ = curr_;
             }
@@ -293,23 +302,26 @@
             // No match causes char to be skipped
             _end_token = _start_token + 1;
             id_ = npos;
+            uid_ = npos;
         }
 
         start_ = _start_token;
         end_ = _end_token;
+        unique_id_ = uid_;
         return id_;
     }
 
     std::size_t next (const std::size_t * const lookup_,
         const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
-        const CharT * &start_, const CharT * &end_)
+        const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
     {
         _start_token = _end_token;
 
         const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
         const CharT *curr_ = _start_token;
         bool end_state_ = *ptr_ != 0;
-        std::size_t id_ = id_ = *(ptr_ + id_index);
+        std::size_t id_ = *(ptr_ + id_index);
+        std::size_t uid_ = *(ptr_ + unique_id_index);
         const CharT *end_token_ = curr_;
 
         for (;;)
@@ -352,6 +364,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+                uid_ = *(ptr_ + unique_id_index);
                 end_token_ = curr_;
             }
         }
@@ -359,6 +372,7 @@
         if (_start_token >= _end_buffer)
         {
             // No more tokens...
+            unique_id_ = npos;
             return 0;
         }
 
@@ -372,6 +386,7 @@
             {
                 end_state_ = true;
                 id_ = *(ptr_ + id_index);
+                uid_ = *(ptr_ + unique_id_index);
                 end_token_ = curr_;
             }
         }
@@ -386,10 +401,12 @@
             // No match causes char to be skipped
             _end_token = _start_token + 1;
             id_ = npos;
+            uid_ = npos;
         }
 
         start_ = _start_token;
         end_ = _end_token;
+        unique_id_ = uid_;
         return id_;
     }
 
Modified: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generator.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -148,6 +148,7 @@
         const typename rules::string_deque_deque ®exes_ =
             rules_.regexes ();
         const typename rules::id_vector_deque &ids_ = rules_.ids ();
+        std::size_t unique_id_ = 0;
         const typename rules::id_vector_deque &states_ = rules_.states ();
         typename rules::string_deque::const_iterator regex_iter_ =
             regexes_[state_].begin ();
@@ -170,9 +171,9 @@
             seen_BOL_assertion_, seen_EOL_assertion_);
 
         detail::node *root_ = parser::parse (regex_.c_str (),
-            regex_.c_str () + regex_.size (), *ids_iter_, *states_iter_,
-            rules_.flags (), rules_.locale (), node_ptr_vector_, macromap_,
-            token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
+            regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
+            *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
+            macromap_, token_map_, seen_BOL_assertion_, seen_EOL_assertion_);
 
         ++regex_iter_;
         ++ids_iter_;
@@ -186,7 +187,7 @@
             const typename rules::string ®ex_ = *regex_iter_;
 
             root_ = parser::parse (regex_.c_str (),
-                regex_.c_str () + regex_.size (), *ids_iter_,
+                regex_.c_str () + regex_.size (), *ids_iter_, unique_id_++,
                 *states_iter_, rules_.flags (), rules_.locale (),
                 node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
@@ -277,7 +278,7 @@
             const typename rules::string &name_ = iter_->first;
             const typename rules::string ®ex_ = iter_->second;
             detail::node *node_ = parser::parse (regex_.c_str (),
-                regex_.c_str () + regex_.size (), 0, 0, flags_,
+                regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
                 locale_, node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
             macro_iter_pair map_iter_ = macromap_.
@@ -362,6 +363,7 @@
     {
         bool end_state_ = false;
         std::size_t id_ = 0;
+        std::size_t unique_id_ = npos;
         std::size_t state_ = 0;
         std::size_t hash_ = 0;
 
@@ -375,8 +377,8 @@
             followpos_->begin (), end_ = followpos_->end ();
             iter_ != end_; ++iter_)
         {
-            closure_ex (*iter_, end_state_, id_, state_, set_ptr_.get (),
-                vector_ptr_.get (), hash_);
+            closure_ex (*iter_, end_state_, id_, unique_id_, state_,
+                set_ptr_.get (), vector_ptr_.get (), hash_);
         }
 
         bool found_ = false;
@@ -413,6 +415,7 @@
             {
                 dfa_[old_size_] |= end_state;
                 dfa_[old_size_ + id_index] = id_;
+                dfa_[old_size_ + unique_id_index] = unique_id_;
                 dfa_[old_size_ + state_index] = state_;
             }
         }
@@ -421,8 +424,8 @@
     }
 
     static void closure_ex (detail::node *node_, bool &end_state_,
-        std::size_t &id_, std::size_t &state_, node_set *set_ptr_,
-        node_vector *vector_ptr_, std::size_t &hash_)
+        std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
+        node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
     {
         const bool temp_end_state_ = node_->end_state ();
 
@@ -432,6 +435,7 @@
             {
                 end_state_ = true;
                 id_ = node_->id ();
+                unique_id_ = node_->unique_id ();
                 state_ = node_->lexer_state ();
             }
         }
@@ -502,7 +506,7 @@
                     }
                     else
                     {
-                        iter_ = lhs_->insert (++iter_, 0);
+                        iter_ = lhs_->insert (++iter_, (charset*)0);
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
@@ -644,7 +648,7 @@
                     }
                     else
                     {
-                        iter_ = lhs_->insert (++iter_, 0);
+                        iter_ = lhs_->insert (++iter_, (equivset*)0);
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
@@ -816,6 +820,7 @@
 
                 new_ptr_[end_state_index] = ptr_[end_state_index];
                 new_ptr_[id_index] = ptr_[id_index];
+                new_ptr_[unique_id_index] = ptr_[unique_id_index];
                 new_ptr_[state_index] = ptr_[state_index];
                 new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
                 new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
Modified: trunk/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/input.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/input.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -32,6 +32,7 @@
         struct data
         {
             std::size_t id;
+            std::size_t unique_id;
             FwdIter start;
             FwdIter end;
             bool bol;
@@ -40,6 +41,7 @@
             // Construct in end() state.
             data () :
                 id (0),
+                unique_id (npos),
                 bol (false),
                 state (npos)
             {
@@ -47,8 +49,9 @@
 
             bool operator == (const data &rhs_) const
             {
-                return id == rhs_.id && start == rhs_.start &&
-                    end == rhs_.end && bol == rhs_.bol && state == rhs_.state;
+                return id == rhs_.id && unique_id == rhs_.unique_id &&
+                    start == rhs_.start && end == rhs_.end &&
+                    bol == rhs_.bol && state == rhs_.state;
             }
         };
 
@@ -115,13 +118,14 @@
                         (&internals_._lookup->front ()->front (),
                         internals_._dfa_alphabet.front (),
                         &internals_._dfa->front ()->front (),
-                        _data.bol, _data.end, _input->_end);
+                        _data.bol, _data.end, _input->_end, _data.unique_id);
                 }
                 else
                 {
                     _data.id = next (&internals_._lookup->front ()->front (),
                         internals_._dfa_alphabet.front (), &internals_.
-                        _dfa->front ()->front (), _data.end, _input->_end);
+                        _dfa->front ()->front (), _data.end, _input->_end,
+                        _data.unique_id);
                 }
             }
             else
@@ -130,12 +134,12 @@
                     internals_._seen_EOL_assertion)
                 {
                     _data.id = next (internals_, _data.state,
-                        _data.bol, _data.end, _input->_end);
+                        _data.bol, _data.end, _input->_end, _data.unique_id);
                 }
                 else
                 {
                     _data.id = next (internals_, _data.state,
-                        _data.end, _input->_end);
+                        _data.end, _input->_end, _data.unique_id);
                 }
             }
 
@@ -148,9 +152,14 @@
 
         std::size_t next (const detail::internals &internals_,
             std::size_t &start_state_, bool bol_,
-            FwdIter &start_token_, const FwdIter &end_)
+            FwdIter &start_token_, const FwdIter &end_,
+            std::size_t &unique_id_)
         {
-            if (start_token_ == end_) return 0;
+            if (start_token_ == end_)
+            {
+                unique_id_ = npos;
+                return 0;
+            }
 
         again:
             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
@@ -161,6 +170,7 @@
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+            std::size_t uid_ = *(ptr_ + unique_id_index);
             bool end_bol_ = bol_;
             FwdIter end_token_ = start_token_;
 
@@ -199,6 +209,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+                    uid_ = *(ptr_ + unique_id_index);
                     start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
@@ -215,6 +226,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+                    uid_ = *(ptr_ + unique_id_index);
                     start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
@@ -239,16 +251,22 @@
                 _data.bol = *start_token_ == '\n';
                 ++start_token_;
                 id_ = npos;
+                uid_ = npos;
             }
 
+            unique_id_ = uid_;
             return id_;
         }
 
         std::size_t next (const detail::internals &internals_,
             std::size_t &start_state_, FwdIter &start_token_,
-            FwdIter const &end_)
+            FwdIter const &end_, std::size_t &unique_id_)
         {
-            if (start_token_ == end_) return 0;
+            if (start_token_ == end_)
+            {
+                unique_id_ = npos;
+                return 0;
+            }
 
         again:
             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
@@ -259,6 +277,7 @@
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+            std::size_t uid_ = *(ptr_ + unique_id_index);
             FwdIter end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -277,6 +296,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+                    uid_ = *(ptr_ + unique_id_index);
                     start_state_ = *(ptr_ + state_index);
                     end_token_ = curr_;
                 }
@@ -294,21 +314,29 @@
                 // No match causes char to be skipped
                 ++start_token_;
                 id_ = npos;
+                uid_ = npos;
             }
 
+            unique_id_ = uid_;
             return id_;
         }
 
         std::size_t next (const std::size_t * const lookup_,
             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
-            bool bol_, FwdIter &start_token_, FwdIter const &end_)
+            bool bol_, FwdIter &start_token_, FwdIter const &end_,
+            std::size_t &unique_id_)
         {
-            if (start_token_ == end_) return 0;
+            if (start_token_ == end_)
+            {
+                unique_id_ = npos;
+                return 0;
+            }
 
             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+            std::size_t uid_ = *(ptr_ + unique_id_index);
             bool end_bol_ = bol_;
             FwdIter end_token_ = start_token_;
 
@@ -347,6 +375,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+                    uid_ = *(ptr_ + unique_id_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -362,6 +391,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+                    uid_ = *(ptr_ + unique_id_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -379,21 +409,29 @@
                 _data.bol = *start_token_ == '\n';
                 ++start_token_;
                 id_ = npos;
+                uid_ = npos;
             }
 
+            unique_id_ = uid_;
             return id_;
         }
 
         std::size_t next (const std::size_t * const lookup_,
             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
-            FwdIter &start_token_, FwdIter const &end_)
+            FwdIter &start_token_, FwdIter const &end_,
+            std::size_t &unique_id_)
         {
-            if (start_token_ == end_) return 0;
+            if (start_token_ == end_)
+            {
+                unique_id_ = npos;
+                return 0;
+            }
 
             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
             FwdIter curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
+            std::size_t uid_ = *(ptr_ + unique_id_index);
             FwdIter end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -412,6 +450,7 @@
                 {
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
+                    uid_ = *(ptr_ + unique_id_index);
                     end_token_ = curr_;
                 }
             }
@@ -426,8 +465,10 @@
                 // No match causes char to be skipped
                 ++start_token_;
                 id_ = npos;
+                uid_ = npos;
             }
 
+            unique_id_ = uid_;
             return id_;
         }
     };
@@ -452,6 +493,7 @@
         iterator iter_;
 
         iter_._input = this;
+        // Over-ride default of 0 (EOI)
         iter_._data.id = npos;
         iter_._data.start = _begin;
         iter_._data.end = _begin;
Modified: trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/parser.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -51,10 +51,10 @@
 <DUPLICATE>  -> '?' | '*' | '+' | '{n[,[m]]}'
 */
     static node *parse (const CharT *start_, const CharT * const end_,
-        const std::size_t id_, const std::size_t dfa_state_,
-        const regex_flags flags_, const std::locale &locale_,
-        node_ptr_vector &node_ptr_vector_, const macro_map ¯omap_,
-        typename tokeniser::token_map &map_,
+        const std::size_t id_, const std::size_t unique_id_,
+        const std::size_t dfa_state_, const regex_flags flags_,
+        const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
+        const macro_map ¯omap_, typename tokeniser::token_map &map_,
         bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
     {
         node *root_ = 0;
@@ -116,7 +116,7 @@
         {
             node_ptr_vector_->push_back (0);
 
-            node *rhs_node_ = new end_node (id_, dfa_state_);
+            node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_);
 
             node_ptr_vector_->back () = rhs_node_;
             node_ptr_vector_->push_back (0);
Modified: trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -18,9 +18,11 @@
 class end_node : public node
 {
 public:
-    end_node (const std::size_t id_, const std::size_t lexer_state_) :
+    end_node (const std::size_t id_, const std::size_t unique_id_,
+        const std::size_t lexer_state_) :
         node (false),
         _id (id_),
+        _unique_id (unique_id_),
         _lexer_state (lexer_state_)
     {
         node::_firstpos.push_back (this);
@@ -58,6 +60,11 @@
         return _id;
     }
 
+    virtual std::size_t unique_id () const
+    {
+        return _unique_id;
+    }
+
     virtual std::size_t lexer_state () const
     {
         return _lexer_state;
@@ -65,6 +72,7 @@
 
 private:
     std::size_t _id;
+    std::size_t _unique_id;
     std::size_t _lexer_state;
     node_vector _followpos;
 
Modified: trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -132,6 +132,11 @@
         throw runtime_error ("Internal error node::id()");
     }
 
+    virtual std::size_t unique_id () const
+    {
+        throw runtime_error ("Internal error node::unique_id()");
+    }
+
     virtual std::size_t lexer_state () const
     {
         throw runtime_error ("Internal error node::state()");
Modified: trunk/boost/spirit/home/support/detail/lexer/rules.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/rules.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/rules.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -44,6 +44,27 @@
             return L"INITIAL";
         }
     };
+
+    template <typename CharT>
+    struct dot;
+
+    template<>
+    struct dot<char>
+    {
+        static const char *str ()
+        {
+            return ".";
+        }
+    };
+
+    template<>
+    struct dot<wchar_t>
+    {
+        static const wchar_t *str()
+        {
+            return L".";
+        }
+    };
 }
 
 template<typename CharT>
@@ -60,6 +81,8 @@
     typedef std::deque<string_pair> string_pair_deque;
     typedef std::map<string, std::size_t> string_size_t_map;
     typedef std::pair<string, std::size_t> string_size_t_pair;
+    typedef std::pair<std::size_t, std::size_t> unique_id_key;
+    typedef std::map<unique_id_key, std::size_t> unique_id_map;
 
     basic_rules (const regex_flags flags_ = dot_not_newline) :
         _flags (flags_)
@@ -141,7 +164,7 @@
         }
     }
 
-    void add_state (const CharT *name_)
+    std::size_t add_state (const CharT *name_)
     {
         validate (name_);
 
@@ -157,6 +180,9 @@
                 _lexer_state_names.push_back (name_);
             }
         }
+
+        // Initial is not stored, so no need to - 1.
+        return _lexer_state_names.size();
     }
 
     void add_macro (const CharT *name_, const CharT *regex_)
@@ -198,62 +224,68 @@
         }
     }
 
-    void add (const CharT *regex_, const std::size_t id_)
+    std::size_t add (const CharT *regex_, const std::size_t id_)
     {
-        add (string (regex_), id_);
+        return add (string (regex_), id_);
     }
 
-    void add (const CharT *regex_start_, const CharT *regex_end_,
+    std::size_t add (const CharT *regex_start_, const CharT *regex_end_,
         const std::size_t id_)
     {
-        add (string (regex_start_, regex_end_), id_);
+        return add (string (regex_start_, regex_end_), id_);
     }
 
-    void add (const string ®ex_, const std::size_t id_)
+    std::size_t add (const string ®ex_, const std::size_t id_)
     {
         check_for_invalid_id (id_);
         _regexes[0].push_back (regex_);
         _ids[0].push_back (id_);
         _states[0].push_back (0);
+        return _regexes[0].size () - 1;
     }
 
     void add (const CharT *curr_state_, const CharT *regex_,
-        const CharT *new_state_)
+        const CharT *new_state_, id_vector *id_vec_ = 0)
     {
-        add (curr_state_, string (regex_), new_state_);
+        add (curr_state_, string (regex_), new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const CharT *regex_start_,
-        const CharT *regex_end_, const CharT *new_state_)
+        const CharT *regex_end_, const CharT *new_state_,
+        id_vector *id_vec_ = 0)
     {
-        add (curr_state_, string (regex_start_, regex_end_), new_state_);
+        add (curr_state_, string (regex_start_, regex_end_),
+            new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const string ®ex_,
-        const CharT *new_state_)
+        const CharT *new_state_, id_vector *id_vec_ = 0)
     {
-        add (curr_state_, regex_, 0, new_state_, false);
+        add (curr_state_, regex_, 0, new_state_, false, id_vec_);
     }
 
     void add (const CharT *curr_state_, const CharT *regex_,
-        const std::size_t id_, const CharT *new_state_)
+        const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
     {
-        add (curr_state_, string (regex_), id_, new_state_);
+        add (curr_state_, string (regex_), id_, new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const CharT *regex_start_,
-        const CharT *regex_end_, const std::size_t id_, const CharT *new_state_)
+        const CharT *regex_end_, const std::size_t id_,
+        const CharT *new_state_, id_vector *id_vec_ = 0)
     {
-        add (curr_state_, string (regex_start_, regex_end_), id_, new_state_);
+        add (curr_state_, string (regex_start_, regex_end_), id_,
+            new_state_, id_vec_);
     }
 
     void add (const CharT *curr_state_, const string ®ex_,
-        const std::size_t id_, const CharT *new_state_)
+        const std::size_t id_, const CharT *new_state_, id_vector *id_vec_ = 0)
     {
-        add (curr_state_, regex_, id_, new_state_, true);
+        add (curr_state_, regex_, id_, new_state_, true, id_vec_);
     }
 
-    void add (const CharT *curr_state_, const basic_rules &rules_)
+    void add (const CharT *curr_state_, const basic_rules &rules_,
+        id_vector *id_vec_ = 0)
     {
         const string_deque_deque ®exes_ = rules_.regexes ();
         const id_vector_deque &ids_ = rules_.ids ();
@@ -266,6 +298,7 @@
         typename string_deque::const_iterator regex_iter_;
         typename string_deque::const_iterator regex_end_;
         typename id_vector::const_iterator id_iter_;
+        id_vector *temp_id_vec_ = id_vec_;
 
         for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_)
         {
@@ -275,7 +308,16 @@
 
             for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_)
             {
-                add (curr_state_, *regex_iter_, *id_iter_, curr_state_);
+                add (curr_state_, *regex_iter_, *id_iter_, detail::dot<CharT>::str(),
+                    temp_id_vec_);
+
+                if (temp_id_vec_)
+                {
+                    // As suggested by Hartmut, only fill the id_vec_ once.
+                    // The dfa sizes can be examined at the end to get a range
+                    // of ids.
+                    temp_id_vec_ = 0;
+                }
             }
         }
     }
@@ -328,6 +370,19 @@
         return detail::initial<CharT>::str ();
     }
 
+    std::size_t retrieve_id (std::size_t state, std::size_t id) const
+    {
+        unique_id_key key (state, id);
+        typename unique_id_map::const_iterator it = _unique_ids.find (key);
+
+        if (it == _unique_ids.end ())
+        {
+            return npos;
+        }
+
+        return (*it).second;
+    }
+
 private:
     string_size_t_map _statemap;
     string_pair_deque _macrodeque;
@@ -338,13 +393,20 @@
     regex_flags _flags;
     std::locale _locale;
     string_deque _lexer_state_names;
+    unique_id_map _unique_ids;
 
     void add (const CharT *curr_state_, const string ®ex_,
-        const std::size_t id_, const CharT *new_state_, const bool check_)
+        const std::size_t id_, const CharT *new_state_, const bool check_,
+        id_vector *id_vec_ = 0)
     {
         const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
         const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
 
+        if (id_vec_)
+        {
+            id_vec_->clear();
+        }
+
         if (check_)
         {
             check_for_invalid_id (id_);
@@ -443,6 +505,13 @@
             _regexes[curr_].push_back (regex_);
             _ids[curr_].push_back (id_);
             _states[curr_].push_back (dot_ ? curr_ : new_);
+
+            if (id_vec_)
+            {
+                id_vec_->push_back (_regexes[curr_].size () - 1);
+            }
+
+            map_id (dot_ ? curr_ : new_, id_, _regexes[curr_].size () - 1);
         }
     }
 
@@ -528,6 +597,22 @@
             break;
         }
     }
+
+    bool map_id (std::size_t state, std::size_t id, std::size_t unique_id)
+    {
+        typedef typename unique_id_map::iterator iterator_type;
+
+        unique_id_key key (state, id);
+        iterator_type it = _unique_ids.find (key);
+        if (it != _unique_ids.end ())
+        {
+            (*it).second = unique_id;
+            return false;
+        }
+
+        typedef typename unique_id_map::value_type value_type;
+        return _unique_ids.insert (value_type (key, unique_id)).second;
+    }
 };
 
 typedef basic_rules<char> rules;
Modified: trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/state_machine.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -45,6 +45,7 @@
             // Current state info
             bool end_state;
             std::size_t id;
+            std::size_t unique_id;
             std::size_t goto_dfa;
             std::size_t bol_index;
             std::size_t eol_index;
@@ -61,6 +62,7 @@
                 transition (npos),
                 end_state (false),
                 id (npos),
+                unique_id (npos),
                 goto_dfa (npos),
                 bol_index (npos),
                 eol_index (npos),
@@ -77,6 +79,7 @@
                     transition == rhs_.transition &&
                     end_state == rhs_.end_state &&
                     id == rhs_.id &&
+                    unique_id == rhs_.unique_id &&
                     goto_dfa == rhs_.goto_dfa &&
                     bol_index == rhs_.bol_index &&
                     eol_index == rhs_.eol_index &&
@@ -197,6 +200,7 @@
                 _transitions = _data.transitions = ptr_->_transitions.size ();
                 _data.end_state = ptr_->_end_state;
                 _data.id = ptr_->_id;
+                _data.unique_id = ptr_->_unique_id;
                 _data.goto_dfa = ptr_->_state;
                 _data.bol_index = ptr_->_bol_index;
                 _data.eol_index = ptr_->_eol_index;
@@ -281,6 +285,7 @@
             iter_._transition = 0;
             iter_._data.end_state = ptr_->front ()._end_state;
             iter_._data.id = ptr_->front ()._id;
+            iter_._data.unique_id = ptr_->front()._unique_id;
             iter_._data.goto_dfa = ptr_->front ()._state;
             iter_._data.bol_index = ptr_->front ()._bol_index;
             iter_._data.eol_index = ptr_->front ()._eol_index;
@@ -368,6 +373,7 @@
 
                 state_->_end_state = *read_ptr_ != 0;
                 state_->_id = *(read_ptr_ + id_index);
+                state_->_unique_id = *(read_ptr_ + unique_id_index);
                 state_->_state = *(read_ptr_ + state_index);
                 state_->_bol_index = *(read_ptr_ + bol_index) - 1;
                 state_->_eol_index = *(read_ptr_ + eol_index) - 1;
Modified: trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp
==============================================================================
--- trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp	(original)
+++ trunk/libs/spirit/example/lex/static_lexer/word_count_static.hpp	2009-05-22 12:34:23 EDT (Fri, 22 May 2009)
@@ -6,8 +6,8 @@
 
 // Auto-generated by boost::lexer, do not edit
 
-#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_14_2009_13_47_08)
-#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_14_2009_13_47_08
+#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02)
+#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_MAY_22_2009_09_41_02
 
 #include <boost/detail/iterator.hpp>
 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
@@ -22,52 +22,58 @@
     "INITIAL"
 };
 
+// this variable defines the number of lexer states
+std::size_t const lexer_state_count = 1;
+
 template<typename Iterator>
 std::size_t next_token (std::size_t &start_state_, Iterator const& start_, 
-    Iterator &start_token_, Iterator const& end_)
+    Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)
 {
-    enum {end_state_index, id_index, state_index, bol_index, eol_index,
-        dead_state_index, dfa_offset};
+    enum {end_state_index, id_index, unique_id_index, state_index, bol_index,
+        eol_index, dead_state_index, dfa_offset};
     static const std::size_t npos = static_cast<std::size_t>(~0);
-    static const std::size_t lookup_[256] = {7, 7, 7, 7, 7, 7, 7, 7,
-        7, 8, 6, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        8, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7,
-        7, 7, 7, 7, 7, 7, 7, 7};
-    static const std::size_t dfa_alphabet_ = 9;
-    static const std::size_t dfa_[45] = {0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 3,
-        2, 4, 1, 65536, 0, 0, 0, 0,
-        0, 2, 0, 1, 10, 0, 0, 0,
-        0, 0, 0, 0, 1, 65537, 0, 0,
-        0, 0, 0, 0, 0};
+    static const std::size_t lookup_[256] = {
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 9, 7, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        9, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8 };
+    static const std::size_t dfa_alphabet_ = 10;
+    static const std::size_t dfa_[50] = {
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 3, 2, 4, 1, 65536, 0, 0,
+        0, 0, 0, 0, 2, 0, 1, 10,
+        1, 0, 0, 0, 0, 0, 0, 0,
+        1, 65537, 2, 0, 0, 0, 0, 0,
+        0, 0 };
 
     if (start_token_ == end_) return 0;
 
@@ -75,13 +81,13 @@
     Iterator curr_ = start_token_;
     bool end_state_ = *ptr_ != 0;
     std::size_t id_ = *(ptr_ + id_index);
+    std::size_t uid_ = *(ptr_ + unique_id_index);
     Iterator end_token_ = start_token_;
 
     while (curr_ != end_)
     {
         std::size_t const state_ =
-            ptr_[lookup_[static_cast<unsigned char>
-                (*curr_++)]];
+            ptr_[lookup_[static_cast<unsigned char>(*curr_++)]];
 
         if (state_ == 0) break;
 
@@ -91,6 +97,7 @@
         {
             end_state_ = true;
             id_ = *(ptr_ + id_index);
+            uid_ = *(ptr_ + unique_id_index);
             end_token_ = curr_;
         }
     }
@@ -103,8 +110,10 @@
     else
     {
         id_ = npos;
+        uid_ = npos;
     }
 
+    unique_id_ = uid_;
     return id_;
 }