$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r60345 - trunk/boost/spirit/home/support/char_encoding/unicode
From: joel_at_[hidden]
Date: 2010-03-08 07:43:43
Author: djowel
Date: 2010-03-08 07:43:42 EST (Mon, 08 Mar 2010)
New Revision: 60345
URL: http://svn.boost.org/trac/boost/changeset/60345
Log:
tweaks. caught a unicode lowercase table lookup bug
Text files modified: 
   trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp |   110 ++++++++++++++++++++------------------- 
   1 files changed, 57 insertions(+), 53 deletions(-)
Modified: trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp	(original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp	2010-03-08 07:43:42 EST (Mon, 08 Mar 2010)
@@ -18,17 +18,17 @@
 # include "uppercase_table.hpp"
 
 namespace boost { namespace spirit { namespace ucd
-{    
+{
     // This header provides Basic (Level 1) Unicode Support
     // See http://unicode.org/reports/tr18/ for details
-    
+
     struct properties
     {
         // bit pattern: xxMMMCCC
         // MMM: major_category
         // CCC: category
 
-        enum major_category 
+        enum major_category
         {
             letter,
             mark,
@@ -38,47 +38,47 @@
             punctuation,
             symbol
         };
-        
+
         enum category
         {
             uppercase_letter = 0,   // [Lu] an uppercase letter
-            lowercase_letter,       // [Ll] a lowercase letter 
-            titlecase_letter,       // [Lt] a digraphic character, with first part uppercase 
-            modifier_letter,        // [Lm] a modifier letter 
+            lowercase_letter,       // [Ll] a lowercase letter
+            titlecase_letter,       // [Lt] a digraphic character, with first part uppercase
+            modifier_letter,        // [Lm] a modifier letter
             other_letter,           // [Lo] other letters, including syllables and ideographs
-                                    
+
             nonspacing_mark = 8,    // [Mn] a nonspacing combining mark (zero advance width)
             enclosing_mark,         // [Me] an enclosing combining mark
-            spacing_mark,           // [Mc] a spacing combining mark (positive advance width)   
-                                    
-            decimal_number = 16,    // [Nd] a decimal digit 
+            spacing_mark,           // [Mc] a spacing combining mark (positive advance width)
+
+            decimal_number = 16,    // [Nd] a decimal digit
             letter_number,          // [Nl] a letterlike numeric character
             other_number,           // [No] a numeric character of other type
-                                    
+
             space_separator = 24,   // [Zs] a space character (of various non-zero widths)
-            line_separator,         // [Zl] U+2028 LINE SEPARATOR only 
+            line_separator,         // [Zl] U+2028 LINE SEPARATOR only
             paragraph_separator,    // [Zp] U+2029 PARAGRAPH SEPARATOR only
-                                    
+
             control = 32,           // [Cc] a C0 or C1 control code
             format,                 // [Cf] a format control character
             private_use,            // [Co] a private-use character
             surrogate,              // [Cs] a surrogate code point
             unassigned,             // [Cn] a reserved unassigned code point or a noncharacter
-                                    
+
             dash_punctuation = 40,  // [Pd] a dash or hyphen punctuation mark
-            open_punctuation,       // [Ps] an opening punctuation mark (of a pair)  
-            close_punctuation,      // [Pe] a closing punctuation mark (of a pair)  
-            connector_punctuation,  // [Pc] a connecting punctuation mark, like a tie 
-            other_punctuation,      // [Po] a punctuation mark of other type 
-            initial_punctuation,    // [Pi] an initial quotation mark 
+            open_punctuation,       // [Ps] an opening punctuation mark (of a pair)
+            close_punctuation,      // [Pe] a closing punctuation mark (of a pair)
+            connector_punctuation,  // [Pc] a connecting punctuation mark, like a tie
+            other_punctuation,      // [Po] a punctuation mark of other type
+            initial_punctuation,    // [Pi] an initial quotation mark
             final_punctuation,      // [Pf] a final quotation mark
-                                    
+
             math_symbol = 48,       // [Sm] a symbol of primarily mathematical use
-            currency_symbol,        // [Sc] a currency sign 
+            currency_symbol,        // [Sc] a currency sign
             modifier_symbol,        // [Sk] a non-letterlike modifier symbol
-            other_symbol            // [So] a symbol of other type 
+            other_symbol            // [So] a symbol of other type
         };
-        
+
         enum derived_properties
         {
             alphabetic = 64,
@@ -89,7 +89,7 @@
             noncharacter_code_point = 2048,
             default_ignorable_code_point = 4096
         };
-        
+
         enum script
         {
             arabic = 0,
@@ -187,114 +187,118 @@
             common = 92,
             unknown = 93
         };
-    }; 
-        
+    };
+
     inline properties::category get_category(::boost::uint32_t ch)
     {
         return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
     }
-    
+
     inline properties::major_category get_major_category(::boost::uint32_t ch)
     {
         return static_cast<properties::major_category>(get_category(ch) >> 3);
     }
-    
+
     inline bool is_punctuation(::boost::uint32_t ch)
     {
         return get_major_category(ch) == properties::punctuation;
-    } 
-    
+    }
+
     inline bool is_decimal_number(::boost::uint32_t ch)
     {
         return get_category(ch) == properties::decimal_number;
     }
-    
+
     inline bool is_hex_digit(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::hex_digit) != 0;
-    }  
-    
+    }
+
     inline bool is_control(::boost::uint32_t ch)
     {
         return get_category(ch) == properties::control;
     }
-    
+
     inline bool is_alphabetic(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::alphabetic) != 0;
     }
-        
+
     inline bool is_alphanumeric(::boost::uint32_t ch)
     {
         return is_decimal_number(ch) || is_alphabetic(ch);
     }
-    
+
     inline bool is_uppercase(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::uppercase) != 0;
     }
-    
+
     inline bool is_lowercase(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::lowercase) != 0;
     }
-    
+
     inline bool is_white_space(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::white_space) != 0;
     }
-    
+
     inline bool is_blank(::boost::uint32_t ch)
     {
         switch (ch)
         {
             case '\n': case '\v': case '\f': case '\r':
                 return false;
-            default: 
-                return is_white_space(ch) 
-                && !(   get_category(ch) == properties::line_separator 
+            default:
+                return is_white_space(ch)
+                && !(   get_category(ch) == properties::line_separator
                     ||  get_category(ch) == properties::paragraph_separator
                     );
         }
     }
-    
+
     inline bool is_graph(::boost::uint32_t ch)
     {
         return !(   is_white_space(ch)
-                ||  get_category(ch) == properties::control 
+                ||  get_category(ch) == properties::control
                 ||  get_category(ch) == properties::surrogate
                 ||  get_category(ch) == properties::unassigned
                 );
     }
-    
+
     inline bool is_print(::boost::uint32_t ch)
     {
         return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
-    } 
+    }
 
     inline bool is_noncharacter_code_point(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
     }
-    
+
     inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
     }
-    
+
     inline properties::script get_script(::boost::uint32_t ch)
     {
         return static_cast<properties::script>(detail::script_lookup(ch) & 0x3F);
     }
-    
+
     inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
     {
-        return detail::lowercase_lookup(ch);
+        // The table returns 0 to signal that this code maps to itself
+        ::boost::uint32_t r = detail::lowercase_lookup(ch);
+        return (r == 0)? ch : r;
     }
-    
+
     inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
     {
-        return detail::uppercase_lookup(ch);
+        // The table returns 0 to signal that this code maps to itself
+        ::boost::uint32_t r = detail::uppercase_lookup(ch);
+        return (r == 0)? ch : r;
     }
 }}}