$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r57529 - in trunk/boost/spirit/home: lex/lexer/lexertl support/detail/lexer
From: hartmut.kaiser_at_[hidden]
Date: 2009-11-09 21:00:28
Author: hkaiser
Date: 2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
New Revision: 57529
URL: http://svn.boost.org/trac/boost/changeset/57529
Log:
Spirit: implemented forward iterator support for lexer
Text files modified: 
   trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp        |    13 +                                       
   trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp     |   122 ++++++++++++++------                    
   trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp  |    56 ++++++--                                
   trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp |    16 +-                                      
   trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp        |    11 +                                       
   trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp     |   236 ++++++++++++++++++++++----------------- 
   trunk/boost/spirit/home/support/detail/lexer/generator.hpp        |     9 +                                       
   trunk/boost/spirit/home/support/detail/lexer/input.hpp            |    19 ++-                                     
   8 files changed, 303 insertions(+), 179 deletions(-)
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/functor_data.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -54,7 +54,8 @@
             data (IterData const& data_, Iterator& first, Iterator const& last)
               : first_(first), last_(last)
               , state_machine_(data_.state_machine_)
-              , rules_(data_.rules_) {}
+              , rules_(data_.rules_)
+              , bol_(data_.state_machine_.data()._seen_BOL_assertion) {}
 
             // The following functions are used by the implementation of the 
             // placeholder '_state'.
@@ -88,7 +89,7 @@
                 return it; 
             }
 
-            // The function more() is used by the implemention of the support 
+            // The function more() is used by the implementation of the support 
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it 
             // matches a rule, the corresponding token should be appended onto 
@@ -130,7 +131,7 @@
             std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
-                return tokenizer::next(state_machine_, first_, end, last_
+                return tokenizer::next(state_machine_, bol_, end, last_
                   , unique_id);
             }
 
@@ -162,6 +163,8 @@
             boost::lexer::basic_state_machine<char_type> const& state_machine_;
             boost::lexer::basic_rules<char_type> const& rules_;
 
+            bool bol_;      // helper storing whether last character was \n
+
         private:
             // silence MSVC warning C4512: assignment operator could not be generated
             data& operator= (data const&);
@@ -223,7 +226,7 @@
             {
                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
                 return tokenizer::next(this->state_machine_, state_, 
-                    this->get_first(), end, this->get_eoi(), unique_id);
+                    this->bol_, end, this->get_eoi(), unique_id);
             }
 
             std::size_t& get_state() { return state_; }
@@ -290,7 +293,7 @@
                 return it;
             }
 
-            // The function more() is used by the implemention of the support 
+            // The function more() is used by the implementation of the support 
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it 
             // matches a rule, the corresponding token should be appended onto 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -89,7 +89,8 @@
     }
 
     inline bool 
-    generate_cpp_state_table (std::ostream &os_, char const* name_suffix = "")
+    generate_cpp_state_table (std::ostream &os_, char const* name_suffix
+      , bool bol, bool eol)
     {
         std::string suffix(name_suffix[0] ? "_" : "");
         suffix += name_suffix;
@@ -97,9 +98,13 @@
         generate_delimiter(os_);
         os_ << "// this defines a generic accessors for the information above\n";
         os_ << "struct lexer" << suffix << "\n{\n";
-        os_ << "    // version number of compatible static lexer engine\n";
-        os_ << "    enum { static_version = " 
-            << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << " };\n\n";
+        os_ << "    // version number and feature-set of compatible static lexer engine\n";
+        os_ << "    enum\n";
+        os_ << "    {\n        static_version = "
+            << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << ",\n";
+        os_ << "        supports_bol = " << std::boolalpha << bol << ",\n";
+        os_ << "        supports_eol = " << std::boolalpha << eol << "\n";
+        os_ << "    };\n\n";
         os_ << "    // return the number of lexer states\n";
         os_ << "    static std::size_t const state_count()\n";
         os_ << "    {\n        return lexer_state_count" << suffix << "; \n    }\n\n";
@@ -108,10 +113,10 @@
         os_ << "    {\n        return lexer_state_names" << suffix << "[idx]; \n    }\n\n";
         os_ << "    // return the next matched token\n";
         os_ << "    template<typename Iterator>\n";
-        os_ << "    static std::size_t next(std::size_t &start_state_, Iterator const& start_\n";
+        os_ << "    static std::size_t next(std::size_t &start_state_, bool& bol_\n";
         os_ << "      , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
         os_ << "    {\n        return next_token" << suffix 
-            << "(start_state_, start_, start_token_, end_, unique_id_);\n    }\n";
+            << "(start_state_, bol_, start_token_, end_, unique_id_);\n    }\n";
         os_ << "};\n\n";
         return os_.good();
     }
@@ -184,11 +189,11 @@
 
         if (sm_.data()._seen_BOL_assertion)
         {
-            os_ << "Iterator const& start_, ";
+            os_ << "bool& bol_, ";
         }
         else if (!optimize_parameters)
         {
-            os_ << "Iterator const& /*start_*/, ";
+            os_ << "bool& /*bol_*/, ";
         }
 
         if (dfas_ > 1 || sm_.data()._seen_BOL_assertion || !optimize_parameters)
@@ -391,7 +396,15 @@
             os_ << " };\n";
         }
 
-        os_ << "\n    if (start_token_ == end_) return 0;\n\n";
+        os_ << "\n    if (start_token_ == end_)\n";
+        os_ << "    {\n";
+        os_ << "        unique_id_ = boost::lexer::npos;\n";
+        os_ << "        return 0;\n";
+        os_ << "    }\n\n";
+        if (sm_.data()._seen_BOL_assertion)
+        {
+            os_ << "    bool bol = bol_;\n\n";
+        }
 
         if (dfas_ > 1)
         {
@@ -401,11 +414,19 @@
             os_ << "    const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
         }
 
-        os_ << "    const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
+        os_ << "    std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
         os_ << "    Iterator curr_ = start_token_;\n";
         os_ << "    bool end_state_ = *ptr_ != 0;\n";
         os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
         os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+        if (dfas_ > 1)
+        {
+            os_ << "    std::size_t end_start_state_ = start_state_;\n";
+        }
+        if (sm_.data()._seen_BOL_assertion)
+        {
+            os_ << "    bool end_bol_ = bol_;\n";
+        }
         os_ << "    Iterator end_token_ = start_token_;\n\n";
 
         os_ << "    while (curr_ != end_)\n";
@@ -423,8 +444,7 @@
 
         if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
         {
-            os_ << "        if (BOL_state_ && (start_token_ == start_ ||\n";
-            os_ << "            *(start_token_ - 1) == '\\n'))\n";
+            os_ << "        if (BOL_state_ && bol)\n";
             os_ << "        {\n";
             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
             os_ << "        }\n";
@@ -434,17 +454,18 @@
             os_ << "        }\n";
             os_ << "        else\n";
             os_ << "        {\n";
-            os_ << "            std::size_t const state_ =\n";
-
             if (lookups_ == 256)
             {
-                os_ << "                ptr_[lookup_[<typename Traits::index_type>"
-                    "(*curr_++)]];\n";
+                os_ << "            unsigned char index = \n";
+                os_ << "                static_cast<unsigned char>(*curr_++);\n";
             }
             else
             {
-                os_ << "                ptr_[lookup_[*curr_++]];\n";
+                os_ << "            std::size_t index = *curr_++\n";
             }
+            os_ << "            bol = (index == '\n') ? true : false;\n";
+            os_ << "            std::size_t const state_ = ptr_[\n";
+            os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
 
             os_ << '\n';
             os_ << "            if (state_ == 0) break;\n";
@@ -454,24 +475,24 @@
         }
         else if (sm_.data()._seen_BOL_assertion)
         {
-            os_ << "        if (BOL_state_ && (start_token_ == start_ ||\n";
-            os_ << "            *(start_token_ - 1) == '\\n'))\n";
+            os_ << "        if (BOL_state_ && bol)\n";
             os_ << "        {\n";
             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
             os_ << "        }\n";
             os_ << "        else\n";
             os_ << "        {\n";
-            os_ << "            std::size_t const state_ =\n";
-
             if (lookups_ == 256)
             {
-                os_ << "                ptr_[lookup_[static_cast<unsigned char>"
-                       "(*curr_++)]];\n";
+                os_ << "            unsigned char index = \n";
+                os_ << "                static_cast<unsigned char>(*curr_++);\n";
             }
             else
             {
-                os_ << "                ptr_[lookup_[*curr_++]];\n";
+                os_ << "            std::size_t index = *curr_++\n";
             }
+            os_ << "            bol = (index == '\n') ? true : false;\n";
+            os_ << "            std::size_t const state_ = ptr_[\n";
+            os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
 
             os_ << '\n';
             os_ << "            if (state_ == 0) break;\n";
@@ -487,17 +508,18 @@
             os_ << "        }\n";
             os_ << "        else\n";
             os_ << "        {\n";
-            os_ << "            std::size_t const state_ =\n";
-
             if (lookups_ == 256)
             {
-                os_ << "                ptr_[lookup_[static_cast<unsigned char>"
-                       "(*curr_++)]];\n";
+                os_ << "            unsigned char index = \n";
+                os_ << "                static_cast<unsigned char>(*curr_++);\n";
             }
             else
             {
-                os_ << "                ptr_[lookup_[*curr_++]];\n";
+                os_ << "            std::size_t index = *curr_++\n";
             }
+            os_ << "            bol = (index == '\n') ? true : false;\n";
+            os_ << "            std::size_t const state_ = ptr_[\n";
+            os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
 
             os_ << '\n';
             os_ << "            if (state_ == 0) break;\n";
@@ -530,12 +552,14 @@
         os_ << "            end_state_ = true;\n";
         os_ << "            id_ = *(ptr_ + id_index);\n";
         os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
-
         if (dfas_ > 1)
         {
-            os_ << "            start_state_ = *(ptr_ + state_index);\n";
+            os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
+        }
+        if (sm_.data()._seen_BOL_assertion)
+        {
+            os_ << "            end_bol_ = bol;\n";
         }
-
         os_ << "            end_token_ = curr_;\n";
         os_ << "        }\n";
         os_ << "    }\n\n";
@@ -553,12 +577,14 @@
             os_ << "            end_state_ = true;\n";
             os_ << "            id_ = *(ptr_ + id_index);\n";
             os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
-
             if (dfas_ > 1)
             {
-                os_ << "            start_state_ = *(ptr_ + state_index);\n";
+                os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
+            }
+            if (sm_.data()._seen_BOL_assertion)
+            {
+                os_ << "            end_bol_ = bol;\n";
             }
-
             os_ << "            end_token_ = curr_;\n";
             os_ << "        }\n";
             os_ << "    }\n\n";
@@ -567,17 +593,36 @@
         os_ << "    if (end_state_)\n";
         os_ << "    {\n";
         os_ << "        // return longest match\n";
+        os_ << "        start_state_ = end_start_state_;\n";
         os_ << "        start_token_ = end_token_;\n";
 
         if (dfas_ > 1)
         {
-            os_ << "        if (id_ == 0) goto again;\n";
+            os_ << "        if (id_ == 0)\n";
+            os_ << "        {\n";
+            if (sm_.data()._seen_BOL_assertion)
+            {
+                os_ << "            bol_ = end_bol_;\n";
+            }
+            os_ << "            goto again;\n";
+            os_ << "        }\n";
+            if (sm_.data()._seen_BOL_assertion)
+            {
+                os_ << "        else\n";
+                os_ << "        {\n";
+                os_ << "            bol_ = end_bol_;\n";
+                os_ << "        }\n";
+            }
         }
 
         os_ << "    }\n";
         os_ << "    else\n";
         os_ << "    {\n";
 
+        if (sm_.data()._seen_BOL_assertion)
+        {
+            os_ << "        bol_ = (*start_token_ == '\n') ? true : false;\n";
+        }
         if (skip_on_nomatch)
         {
             os_ << "        // No match causes char to be skipped\n";
@@ -592,8 +637,11 @@
         os_ << "    return id_;\n";
         os_ << "}\n\n";
 
-        if (!generate_cpp_state_table(os_, name_suffix))
+        if (!generate_cpp_state_table(os_, name_suffix
+            , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
+        {
             return false;
+        }
 
         os_ << "}}}}}  // namespace boost::spirit::lex::lexertl::static_\n\n";
 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -30,9 +30,8 @@
 
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
-          , std::size_t &dfa_state_, Iterator const& start_
-          , Iterator &start_token_, Iterator const& end_
-          , std::size_t& unique_id_)
+          , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
+          , Iterator const& end_, std::size_t& unique_id_)
         {
             if (start_token_ == end_) 
             {
@@ -40,16 +39,21 @@
                 return 0;
             }
 
+            bool bol = bol_;
+
         again:
             std::size_t const* lookup_ = &state_machine_.data()._lookup[dfa_state_]->
                 front ();
             std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[dfa_state_];
             std::size_t const* dfa_ = &state_machine_.data()._dfa[dfa_state_]->front ();
+
             std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
             std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
+            std::size_t end_start_state_ = dfa_state_;
+            bool end_bol_ = bol_;
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -57,8 +61,7 @@
                 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
                 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
 
-                if (BOL_state_ && (start_token_ == start_ ||
-                    *(start_token_ - 1) == '\n'))
+                if (BOL_state_ && bol)
                 {
                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
                 }
@@ -77,6 +80,7 @@
 
                     index_type index = 
                         boost::lexer::char_traits<value_type>::call(*curr_++);
+                    bol = (index == '\n') ? true : false;
                     std::size_t const state_ = ptr_[
                         lookup_[static_cast<std::size_t>(index)]];
 
@@ -93,7 +97,8 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
-                    dfa_state_ = *(ptr_ + boost::lexer::state_index);
+                    end_start_state_ = *(ptr_ + boost::lexer::state_index);
+                    end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
@@ -109,19 +114,29 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
-                    dfa_state_ = *(ptr_ + boost::lexer::state_index);
+                    end_start_state_ = *(ptr_ + boost::lexer::state_index);
+                    end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
 
             if (end_state_) {
                 // return longest match
+                dfa_state_ = end_start_state_;
                 start_token_ = end_token_;
 
-                if (id_ == 0) 
+                if (id_ == 0)
+                {
+                    bol = end_bol_;
                     goto again;
+                }
+                else
+                {
+                    bol_ = end_bol_;
+                }
             }
             else {
+                bol_ = (*start_token_ == '\n') ? true : false;
                 id_ = boost::lexer::npos;
                 uid_ = boost::lexer::npos;
             }
@@ -133,19 +148,26 @@
         ///////////////////////////////////////////////////////////////////////
         static std::size_t next (
             boost::lexer::basic_state_machine<char_type> const& state_machine_
-          , Iterator const& start_, Iterator &start_token_, Iterator const& end_
+          , bool& bol_, Iterator &start_token_, Iterator const& end_
           , std::size_t& unique_id_)
         {
-            if (start_token_ == end_) return 0;
+            if (start_token_ == end_)
+            {
+                unique_id_ = boost::lexer::npos;
+                return 0;
+            }
 
+            bool bol = bol_;
             std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
             std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
             std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
             std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
+
             Iterator curr_ = start_token_;
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + boost::lexer::id_index);
             std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
+            bool end_bol_ = bol_;
             Iterator end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -153,8 +175,7 @@
                 std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
                 std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
 
-                if (BOL_state_ && (start_token_ == start_ ||
-                    *(start_token_ - 1) == '\n'))
+                if (BOL_state_ && bol)
                 {
                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
                 }
@@ -170,9 +191,10 @@
                     typedef typename 
                         boost::lexer::char_traits<value_type>::index_type 
                     index_type;
-                    
+
                     index_type index = 
                         boost::lexer::char_traits<value_type>::call(*curr_++);
+                    bol = (index == '\n') ? true : false;
                     std::size_t const state_ = ptr_[
                         lookup_[static_cast<std::size_t>(index)]];
 
@@ -189,6 +211,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
+                    end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
@@ -204,15 +227,18 @@
                     end_state_ = true;
                     id_ = *(ptr_ + boost::lexer::id_index);
                     uid_ = *(ptr_ + boost::lexer::unique_id_index);
+                    end_bol_ = bol;
                     end_token_ = curr_;
                 }
             }
 
             if (end_state_) {
                 // return longest match
+                bol_ = end_bol_;
                 start_token_ = end_token_;
             }
             else {
+                bol_ = *start_token_ == '\n';
                 id_ = boost::lexer::npos;
                 uid_ = boost::lexer::npos;
             }
@@ -222,10 +248,6 @@
         }
     };
 
-    ///////////////////////////////////////////////////////////////////////////
-    typedef basic_iterator_tokeniser<char const *> tokeniser;
-    typedef basic_iterator_tokeniser<wchar_t const *> wtokeniser;
-
 }}}}
 
 #endif
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -61,7 +61,8 @@
               , std::size_t> wrap_action_type;
  
             typedef std::size_t (*next_token_functor)(std::size_t&, 
-                Iterator const&, Iterator&, Iterator const&, std::size_t&);
+as                 Iterator const&, Iterator&, Iterator const&, std::size_t&);
+                bool&, Iterator&, Iterator const&, std::size_t&);
             typedef char_type const* (*get_state_name_type)(std::size_t);
 
             // initialize the shared data 
@@ -70,7 +71,8 @@
                   , Iterator const& last)
               : first_(first), last_(last) 
               , next_token_(data.next_)
-              , get_state_name_(data.get_state_name_){}
+              , get_state_name_(data.get_state_name_)
+              , bol_(data.bol_) {}
 
             // The following functions are used by the implementation of the 
             // placeholder '_state'.
@@ -107,7 +109,7 @@
                 return it; 
             }
 
-            // The function more() is used by the implemention of the support 
+            // The function more() is used by the implementation of the support 
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it 
             // matches a rule, the corresponding token should be appended onto 
@@ -149,7 +151,7 @@
             std::size_t next(Iterator& end, std::size_t& unique_id)
             {
                 std::size_t state;
-                return next_token_(state, first_, end, last_, unique_id);
+                return next_token_(state, bol_, end, last_, unique_id);
             }
 
             // nothing to invoke, so this is empty
@@ -180,6 +182,8 @@
             next_token_functor next_token_;
             get_state_name_type get_state_name_;
 
+            bool bol_;
+
         private:
             // silence MSVC warning C4512: assignment operator could not be generated
             static_data& operator= (static_data const&);
@@ -242,7 +246,7 @@
             // underlying input sequence. 
             std::size_t next(Iterator& end, std::size_t& unique_id)
             {
-                return this->next_token_(state_, this->first_, end, this->last_
+                return this->next_token_(state_, this->bol_, end, this->last_
                   , unique_id);
             }
 
@@ -312,7 +316,7 @@
                 return it;
             }
 
-            // The function more() is used by the implemention of the support 
+            // The function more() is used by the implementation of the support 
             // function lex::more(). Its functionality is equivalent to flex'
             // function yymore(): it tells the lexer that the next time it 
             // matches a rule, the corresponding token should be appended onto 
Modified: trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp
==============================================================================
--- trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp	(original)
+++ trunk/boost/spirit/home/lex/lexer/lexertl/static_lexer.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -139,15 +139,17 @@
 
             iterator_data_type(next_token_functor next
               , semantic_actions_type const& actions
-              , get_state_name_type get_state_name, std::size_t num_states)
+              , get_state_name_type get_state_name, std::size_t num_states
+              , bool bol)
               : next_(next), actions_(actions), get_state_name_(get_state_name)
-              , num_states_(num_states)
+              , num_states_(num_states), bol_(bol)
             {}
 
             next_token_functor next_;
             semantic_actions_type const& actions_;
             get_state_name_type get_state_name_;
             std::size_t num_states_;
+            bool bol_;
 
         private:
             // silence MSVC warning C4512: assignment operator could not be generated
@@ -173,8 +175,9 @@
           , char_type const* initial_state = 0) const
         { 
             iterator_data_type iterator_data( 
-                    &tables_type::template next<Iterator_>, actions_, 
-                    &tables_type::state_name, tables_type::state_count()
+                    &tables_type::template next<Iterator_>, actions_
+                  , &tables_type::state_name, tables_type::state_count()
+                  , tables_type::supports_bol
                 );
             return iterator_type(iterator_data, first, last, initial_state);
         }
Modified: trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/generate_cpp.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -79,7 +79,7 @@
         upper_name_.begin (), ::toupper);
     os_ << "#ifndef " << upper_name_ + '\n';
     os_ << "#define " << upper_name_ + '\n';
-    os_ << "// Copyright (c) 2008 Ben Hanson\n";
+    os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
     os_ << "//\n";
     os_ << "// Distributed under the Boost Software License, "
         "Version 1.0. (See accompanying\n";
@@ -94,25 +94,6 @@
         os_ << "std::size_t &start_state_, ";
     }
 
-    if (sm_._seen_BOL_assertion || !optimise_parameters_)
-    {
-        if (use_pointers_)
-        {
-            os_ << iterator_ << " const ";
-        }
-        else
-        {
-            os_ << "const " << iterator_;
-        }
-
-        os_ << "start_, ";
-    }
-
-    if (dfas_ > 1 || sm_._seen_BOL_assertion || !optimise_parameters_)
-    {
-        os_ << "\n    ";
-    }
-
     if (use_pointers_)
     {
         os_ << iterator_ << " &";
@@ -133,11 +114,18 @@
         os_ << "const " << iterator_;
     }
 
-    os_ << "end_)\n";
+    os_ << "end_, \n";
+    os_ << "    std::size_t &unique_id_";
+
+    if (sm_._seen_BOL_assertion || !optimise_parameters_)
+    {
+        os_ << ", bool &beg_of_line_";
+    }
+
+    os_ << ")\n";
     os_ << "{\n";
-    os_ << "    enum {end_state_index, id_index, state_index, bol_index, "
-        "eol_index,\n";
-    os_ << "        dead_state_index, dfa_offset};\n";
+    os_ << "    enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n";
+    os_ << "        eol_index, dead_state_index, dfa_offset};\n";
     os_ << "    static const std::size_t npos = static_cast"
         "<std::size_t>(~0);\n";
 
@@ -330,7 +318,11 @@
         os_ << "};\n";
     }
 
-    os_ << "\n    if (start_token_ == end_) return 0;\n\n";
+    os_ << "\n    if (start_token_ == end_)\n";
+    os_ << "    {\n";
+    os_ << "        unique_id_ = npos;\n";
+    os_ << "        return 0;\n";
+    os_ << "    }\n\n";
 
     if (dfas_ > 1)
     {
@@ -346,6 +338,19 @@
     os_ << "    Iterator curr_ = start_token_;\n";
     os_ << "    bool end_state_ = *ptr_ != 0;\n";
     os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
+    os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";
+
+    if (dfas_ > 1)
+    {
+        os_ << "    std::size_t end_start_state_ = start_state_;\n";
+    }
+
+    if (sm_._seen_BOL_assertion)
+    {
+        os_ << "    bool bol_ = beg_of_line_;\n";
+        os_ << "    bool end_bol_ = bol_;\n";
+    }
+
     os_ << "    Iterator end_token_ = start_token_;\n";
     os_ << '\n';
     os_ << "    while (curr_ != end_)\n";
@@ -366,108 +371,89 @@
         os_ << '\n';
     }
 
-    if (sm_._seen_BOL_assertion && sm_._seen_EOL_assertion)
+    if (sm_._seen_BOL_assertion)
     {
-        os_ << "        if (BOL_state_ && (start_token_ == start_ ||\n";
-        os_ << "            *(start_token_ - 1) == '\\n'))\n";
+        os_ << "        if (BOL_state_ && bol_)\n";
         os_ << "        {\n";
         os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
         os_ << "        }\n";
-        os_ << "        else if (EOL_state_ && *curr_ == '\\n')\n";
-        os_ << "        {\n";
-        os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
-        os_ << "        }\n";
-        os_ << "        else\n";
-        os_ << "        {\n";
-        os_ << "            const std::size_t state_ =\n";
+    }
 
-        if (lookups_ == 256)
-        {
-            os_ << "                ptr_[lookup_[static_cast<unsigned char>\n";
-            os_ << "                (*curr_++)]];\n";
-        }
-        else
+    if (sm_._seen_EOL_assertion)
+    {
+        os_ << "        ";
+
+        if (sm_._seen_BOL_assertion)
         {
-            os_ << "                ptr_[lookup_[*curr_++]];\n";
+            os_ << "else ";
         }
 
-        os_ << '\n';
-        os_ << "            if (state_ == 0) break;\n";
-        os_ << '\n';
-        os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+        os_ << "if (EOL_state_ && *curr_ == '\\n')\n";
+        os_ << "        {\n";
+        os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
         os_ << "        }\n";
     }
-    else if (sm_._seen_BOL_assertion)
+
+    std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? "    " : "");
+
+    if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
     {
-        os_ << "        if (BOL_state_ && (start_token_ == start_ ||\n";
-        os_ << "            *(start_token_ - 1) == '\\n'))\n";
-        os_ << "        {\n";
-        os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
-        os_ << "        }\n";
         os_ << "        else\n";
         os_ << "        {\n";
-        os_ << "            const std::size_t state_ =\n";
+    }
+
+    if (sm_._seen_BOL_assertion)
+    {
+        os_ << "            ";
 
         if (lookups_ == 256)
         {
-            os_ << "                ptr_[lookup_[static_cast<unsigned char>\n";
-            os_ << "                (*curr_++)]];\n";
+            os_ << "char";
         }
         else
         {
-            os_ << "                ptr_[lookup_[*curr_++]];\n";
+            os_ << "wchar_t";
         }
 
-        os_ << '\n';
-        os_ << "            if (state_ == 0) break;\n";
-        os_ << '\n';
-        os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
-        os_ << "        }\n";
+        os_ << " prev_char_ = *curr_++;\n\n";
+        os_ << "            bol_ = prev_char_ == '\\n';\n\n";
     }
-    else if (sm_._seen_EOL_assertion)
-    {
-        os_ << "        if (EOL_state_ && *curr_ == '\\n')\n";
-        os_ << "        {\n";
-        os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
-        os_ << "        }\n";
-        os_ << "        else\n";
-        os_ << "        {\n";
-        os_ << "            const std::size_t state_ =\n";
 
-        if (lookups_ == 256)
-        {
-            os_ << "                ptr_[lookup_[static_cast<unsigned char>\n";
-            os_ << "                (*curr_++)]];\n";
-        }
-        else
-        {
-            os_ << "                ptr_[lookup_[*curr_++]];\n";
-        }
+    os_ << tab_;
+    os_ << "        const std::size_t state_ =\n";
+    os_ << tab_;
+    os_ << "            ptr_[lookup_[";
 
-        os_ << '\n';
-        os_ << "            if (state_ == 0) break;\n";
-        os_ << '\n';
-        os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
-        os_ << "        }\n";
+    if (lookups_ == 256)
+    {
+        os_ << "static_cast<unsigned char>(";
+    }
+
+    if (sm_._seen_BOL_assertion)
+    {
+        os_ << "prev_char";
     }
     else
     {
-        os_ << "        const std::size_t state_ =\n";
+        os_ << "*curr_++";
+    }
 
-        if (lookups_ == 256)
-        {
-            os_ << "            ptr_[lookup_[static_cast<unsigned char>\n";
-            os_ << "            (*curr_++)]];\n";
-        }
-        else
-        {
-            os_ << "            ptr_[lookup_[*curr_++]];\n";
-        }
 
-        os_ << '\n';
-        os_ << "        if (state_ == 0) break;\n";
-        os_ << '\n';
-        os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+    if (lookups_ == 256)
+    {
+        os_ << ')';
+    }
+
+    os_ << "]];\n\n";
+
+    os_ << tab_;
+    os_ << "        if (state_ == 0) break;\n\n";
+    os_ << tab_;
+    os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
+
+    if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
+    {
+        os_ << "        }\n";
     }
 
     os_ << '\n';
@@ -475,10 +461,16 @@
     os_ << "        {\n";
     os_ << "            end_state_ = true;\n";
     os_ << "            id_ = *(ptr_ + id_index);\n";
+    os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
 
     if (dfas_ > 1)
     {
-        os_ << "            start_state_ = *(ptr_ + state_index);\n";
+        os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
+    }
+
+    if (sm_._seen_BOL_assertion)
+    {
+        os_ << "            end_bol_ = bol_;\n";
     }
 
     os_ << "            end_token_ = curr_;\n";
@@ -498,10 +490,16 @@
         os_ << "        {\n";
         os_ << "            end_state_ = true;\n";
         os_ << "            id_ = *(ptr_ + id_index);\n";
+        os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
 
         if (dfas_ > 1)
         {
-            os_ << "            start_state_ = *(ptr_ + state_index);\n";
+            os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
+        }
+
+        if (sm_._seen_BOL_assertion)
+        {
+            os_ << "            end_bol_ = bol_;\n";
         }
 
         os_ << "            end_token_ = curr_;\n";
@@ -513,12 +511,40 @@
     os_ << "    if (end_state_)\n";
     os_ << "    {\n";
     os_ << "        // return longest match\n";
+
+    if (dfas_ > 1)
+    {
+        os_ << "        start_state_ = end_start_state_;\n";
+    }
+
+    if (sm_._seen_BOL_assertion && dfas_ < 2)
+    {
+        os_ << "        beg_of_line_ = end_bol_;\n";
+    }
+
     os_ << "        start_token_ = end_token_;\n";
 
     if (dfas_ > 1)
     {
         os_ << '\n';
-        os_ << "        if (id_ == 0) goto again;\n";
+        os_ << "        if (id_ == 0)\n";
+        os_ << "        {\n";
+
+        if (sm_._seen_BOL_assertion)
+        {
+            os_ << "            bol_ = end_bol_;\n";
+        }
+
+        os_ << "            goto again;\n";
+        os_ << "        }\n";
+
+        if (sm_._seen_BOL_assertion)
+        {
+            os_ << "        else\n";
+            os_ << "        {\n";
+            os_ << "            beg_of_line_ = end_bol_;\n";
+            os_ << "        }\n";
+        }
     }
 
     os_ << "    }\n";
@@ -528,12 +554,20 @@
     if (skip_unknown_)
     {
         os_ << "        // No match causes char to be skipped\n";
+
+        if (sm_._seen_BOL_assertion)
+        {
+            os_ << "        beg_of_line_ = *start_token_ == '\\n';\n";
+        }
+        
         os_ << "        ++start_token_;\n";
     }
 
     os_ << "        id_ = npos;\n";
+    os_ << "        uid_ = npos;\n";
     os_ << "    }\n";
     os_ << '\n';
+    os_ << "    unique_id_ = uid_;\n";
     os_ << "    return id_;\n";
     os_ << "}\n";
     os_ << "\n#endif\n";
Modified: trunk/boost/spirit/home/support/detail/lexer/generator.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/generator.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/generator.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -285,7 +285,8 @@
                 locale_, node_ptr_vector_, macromap_, token_map_,
                 seen_BOL_assertion_, seen_EOL_assertion_);
             macro_iter_pair map_iter_ = macromap_.
-                insert (macro_pair (name_, (detail::node const*)0));
+                insert (macro_pair (name_, static_cast<const detail::node *>
+                (0)));
 
             map_iter_.first->second = node_;
         }
@@ -511,7 +512,8 @@
                     }
                     else
                     {
-                        iter_ = lhs_->insert (++iter_, (charset*)0);
+                        iter_ = lhs_->insert (++iter_,
+                            static_cast<charset *>(0));
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
@@ -653,7 +655,8 @@
                     }
                     else
                     {
-                        iter_ = lhs_->insert (++iter_, (equivset*)0);
+                        iter_ = lhs_->insert (++iter_,
+                            static_cast<equivset *>(0));
                         *iter_ = overlap_.release ();
 
                         // VC++ 6 Hack:
Modified: trunk/boost/spirit/home/support/detail/lexer/input.hpp
==============================================================================
--- trunk/boost/spirit/home/support/detail/lexer/input.hpp	(original)
+++ trunk/boost/spirit/home/support/detail/lexer/input.hpp	2009-11-09 21:00:27 EST (Mon, 09 Nov 2009)
@@ -172,6 +172,7 @@
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
             std::size_t uid_ = *(ptr_ + unique_id_index);
+            std::size_t end_start_state_ = start_state_;
             bool end_bol_ = bol_;
             FwdIter end_token_ = start_token_;
 
@@ -211,7 +212,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
                     uid_ = *(ptr_ + unique_id_index);
-                    start_state_ = *(ptr_ + state_index);
+                    end_start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -228,7 +229,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
                     uid_ = *(ptr_ + unique_id_index);
-                    start_state_ = *(ptr_ + state_index);
+                    end_start_state_ = *(ptr_ + state_index);
                     end_bol_ = bol_;
                     end_token_ = curr_;
                 }
@@ -237,14 +238,18 @@
             if (end_state_)
             {
                 // return longest match
-                _data.bol = end_bol_;
+                start_state_ = end_start_state_;
                 start_token_ = end_token_;
 
                 if (id_ == 0)
                 {
-                    bol_ = _data.bol;
+                    bol_ = end_bol_;
                     goto again;
                 }
+                else
+                {
+                    _data.bol = end_bol_;
+                }
             }
             else
             {
@@ -279,6 +284,7 @@
             bool end_state_ = *ptr_ != 0;
             std::size_t id_ = *(ptr_ + id_index);
             std::size_t uid_ = *(ptr_ + unique_id_index);
+            std::size_t end_start_state_ = start_state_;
             FwdIter end_token_ = start_token_;
 
             while (curr_ != end_)
@@ -298,7 +304,7 @@
                     end_state_ = true;
                     id_ = *(ptr_ + id_index);
                     uid_ = *(ptr_ + unique_id_index);
-                    start_state_ = *(ptr_ + state_index);
+                    end_start_state_ = *(ptr_ + state_index);
                     end_token_ = curr_;
                 }
             }
@@ -306,6 +312,7 @@
             if (end_state_)
             {
                 // return longest match
+                start_state_ = end_start_state_;
                 start_token_ = end_token_;
 
                 if (id_ == 0) goto again;
@@ -401,8 +408,8 @@
             if (end_state_)
             {
                 // return longest match
-                start_token_ = end_token_;
                 _data.bol = end_bol_;
+                start_token_ = end_token_;
             }
             else
             {