$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r56256 - in trunk/boost/spirit/home: qi/char support/char_encoding
From: hartmut.kaiser_at_[hidden]
Date: 2009-09-16 21:49:19
Author: hkaiser
Date: 2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
New Revision: 56256
URL: http://svn.boost.org/trac/boost/changeset/56256
Log:
Spirit: fixed a character size mismatch revealed while parsing based on tokens
Text files modified: 
   trunk/boost/spirit/home/qi/char/char.hpp                        |    34 +++++++++++++++++++++++-----------      
   trunk/boost/spirit/home/qi/char/char_class.hpp                  |     3 ++-                                     
   trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp     |     4 +++-                                    
   trunk/boost/spirit/home/support/char_encoding/standard.hpp      |     4 +++-                                    
   trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp |    19 +++++++++++++++++--                     
   5 files changed, 48 insertions(+), 16 deletions(-)
Modified: trunk/boost/spirit/home/qi/char/char.hpp
==============================================================================
--- trunk/boost/spirit/home/qi/char/char.hpp	(original)
+++ trunk/boost/spirit/home/qi/char/char.hpp	2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -119,9 +119,9 @@
         };
 
         template <typename CharParam, typename Context>
-        bool test(CharParam ch, Context&) const
+        bool test(CharParam ch_, Context&) const
         {
-            return this->ch == char_type(ch);
+            return char_encoding::ischar(int(ch_)) && ch == char_type(ch_);
         }
 
         template <typename Context>
@@ -156,9 +156,13 @@
         };
 
         template <typename CharParam, typename Context>
-        bool test(CharParam ch, Context&) const
+        bool test(CharParam ch_, Context&) const
         {
-            return this->lo == char_type(ch) || this->hi == char_type(ch);
+            if (!char_encoding::ischar(int(ch_)))
+                return false;
+
+            char_type ch = char_type(ch_);  // optimize for token based parsing
+            return this->lo == ch || this->hi == ch;
         }
 
         template <typename Context>
@@ -184,9 +188,13 @@
           : from(from), to(to) {}
 
         template <typename CharParam, typename Context>
-        bool test(CharParam ch, Context&) const
+        bool test(CharParam ch_, Context&) const
         {
-            return !(char_type(ch) < from) && !(to < char_type(ch));
+            if (!char_encoding::ischar(int(ch_)))
+                return false;
+
+            char_type ch = char_type(ch_);  // optimize for token based parsing
+            return !(ch < from) && !(to < ch);
         }
 
         template <typename Context>
@@ -216,10 +224,14 @@
         {}
 
         template <typename CharParam, typename Context>
-        bool test(CharParam ch, Context&) const
+        bool test(CharParam ch_, Context&) const
         {
-            return (!(char_type(ch) < from_lo) && !(to_lo < char_type(ch)))
-                || (!(char_type(ch) < from_hi) && !(to_hi < char_type(ch)))
+            if (!char_encoding::ischar(int(ch_)))
+                return false;
+
+            char_type ch = char_type(ch_);  // optimize for token based parsing
+            return (!(ch < from_lo) && !(to_lo < ch))
+                || (!(ch < from_hi) && !(to_hi < ch))
             ;
         }
 
@@ -282,7 +294,7 @@
         template <typename CharParam, typename Context>
         bool test(CharParam ch, Context&) const
         {
-            return chset.test(char_type(ch));
+            return char_encoding::ischar(int(ch)) && chset.test(char_type(ch));
         }
 
         template <typename Context>
@@ -341,7 +353,7 @@
         template <typename CharParam, typename Context>
         bool test(CharParam ch, Context&) const
         {
-            return chset.test(char_type(char_type(ch)));
+            return char_encoding::ischar(int(ch)) && chset.test(char_type(ch));
         }
 
         template <typename Context>
Modified: trunk/boost/spirit/home/qi/char/char_class.hpp
==============================================================================
--- trunk/boost/spirit/home/qi/char/char_class.hpp	(original)
+++ trunk/boost/spirit/home/qi/char/char_class.hpp	2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -57,7 +57,8 @@
         bool test(CharParam ch, Context&) const
         {
             using spirit::char_class::classify;
-            return classify<char_encoding>::is(classification(), ch);
+            return char_encoding::ischar(ch) && 
+                   classify<char_encoding>::is(classification(), ch);
         }
 
         template <typename Context>
Modified: trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp	(original)
+++ trunk/boost/spirit/home/support/char_encoding/iso8859_1.hpp	2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -579,7 +579,9 @@
         static bool
         ischar(int ch)
         {
-            return true; // iso8859.1 uses all 8 bits
+            // iso8859.1 uses all 8 bits
+            // we have to watch out for sign extensions
+            return (0 == (ch & ~0xff) || ~0 == (ch | 0xff)) ? true : false;
         }
 
         static int
Modified: trunk/boost/spirit/home/support/char_encoding/standard.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/standard.hpp	(original)
+++ trunk/boost/spirit/home/support/char_encoding/standard.hpp	2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -33,7 +33,9 @@
         static bool
         ischar(int ch)
         {
-            return true; // use all the bits
+            // uses all 8 bits
+            // we have to watch out for sign extensions
+            return (0 == (ch & ~0xff) || ~0 == (ch | 0xff)) ? true : false;
         }
 
         static int
Modified: trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp	(original)
+++ trunk/boost/spirit/home/support/char_encoding/standard_wide.hpp	2009-09-16 21:49:18 EDT (Wed, 16 Sep 2009)
@@ -16,6 +16,7 @@
 #include <string>
 
 #include <boost/cstdint.hpp>
+#include <boost/spirit/home/support/assert_msg.hpp>
 
 namespace boost { namespace spirit { namespace char_encoding
 {
@@ -40,10 +41,24 @@
             return std::char_traits<Char>::to_char_type(ch);
         }
 
+        template <std::size_t N>
+        struct wchar_t_size
+        {
+            BOOST_SPIRIT_ASSERT_MSG(N == 1 || N == 2 || N == 4,
+                not_supported_size_of_wchar_t, ());
+        };
+
+        template <> struct wchar_t_size<1> { enum { mask = 0xff }; };
+        template <> struct wchar_t_size<2> { enum { mask = 0xffff }; };
+        template <> struct wchar_t_size<4> { enum { mask = 0xffffffff }; };
+
         static bool
-        ischar(wchar_t ch)
+        ischar(int ch)
         {
-            return true; // any wchar_t
+            // we have to watch out for sign extensions
+            return ( 0 == (ch & ~wchar_t_size<sizeof(wchar_t)>::mask) || 
+                    ~0 == (ch | wchar_t_size<sizeof(wchar_t)>::mask)) ? 
+                true : false;     // any wchar_t, but no other bits set
         }
 
         static bool