$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r59440 - in trunk/boost/spirit/home/support: . char_encoding char_encoding/unicode
From: joel_at_[hidden]
Date: 2010-02-03 07:35:11
Author: djowel
Date: 2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
New Revision: 59440
URL: http://svn.boost.org/trac/boost/changeset/59440
Log:
More Unicode Support
Text files modified: 
   trunk/boost/spirit/home/support/char_class.hpp                  |   734 ++++++++++++++++++++++++++++++--------- 
   trunk/boost/spirit/home/support/char_encoding/unicode.hpp       |   203 +++++++++++                             
   trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp |    14                                         
   trunk/boost/spirit/home/support/common_terminals.hpp            |   193 ++++++++++                              
   4 files changed, 958 insertions(+), 186 deletions(-)
Modified: trunk/boost/spirit/home/support/char_class.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_class.hpp	(original)
+++ trunk/boost/spirit/home/support/char_class.hpp	2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -51,6 +51,168 @@
     struct uppernum {};
     struct ucs4 {};
 
+#if defined(BOOST_SPIRIT_UNICODE)
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Major Categories
+///////////////////////////////////////////////////////////////////////////
+    struct letter {};
+    struct mark {};
+    struct number {};
+    struct separator {};
+    struct other {};
+    struct punctuation {};
+    struct symbol {};
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode General Categories
+///////////////////////////////////////////////////////////////////////////
+    struct uppercase_letter {};
+    struct lowercase_letter {};
+    struct titlecase_letter {};
+    struct modifier_letter {};
+    struct other_letter {};
+
+    struct nonspacing_mark {};
+    struct enclosing_mark {};
+    struct spacing_mark {};
+
+    struct decimal_number {};
+    struct letter_number {};
+    struct other_number {};
+
+    struct space_separator {};
+    struct line_separator {};
+    struct paragraph_separator {};
+
+    struct control {};
+    struct format {};
+    struct private_use {};
+    struct surrogate {};
+    struct unassigned {};
+
+    struct dash_punctuation {};
+    struct open_punctuation {};
+    struct close_punctuation {};
+    struct connector_punctuation {};
+    struct other_punctuation {};
+    struct initial_punctuation {};
+    struct final_punctuation {};
+
+    struct math_symbol {};
+    struct currency_symbol {};
+    struct modifier_symbol {};
+    struct other_symbol {};
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Derived Categories
+///////////////////////////////////////////////////////////////////////////
+    struct alphabetic {};
+    struct uppercase {};
+    struct lowercase {};
+    struct white_space {};
+    struct hex_digit {};
+    struct noncharacter_code_point {};
+    struct default_ignorable_code_point {};
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Scripts
+///////////////////////////////////////////////////////////////////////////
+    struct arabic {};
+    struct imperial_aramaic {};
+    struct armenian {};
+    struct avestan {};
+    struct balinese {};
+    struct bamum {};
+    struct bengali {};
+    struct bopomofo {};
+    struct braille {};
+    struct buginese {};
+    struct buhid {};
+    struct canadian_aboriginal {};
+    struct carian {};
+    struct cham {};
+    struct cherokee {};
+    struct coptic {};
+    struct cypriot {};
+    struct cyrillic {};
+    struct devanagari {};
+    struct deseret {};
+    struct egyptian_hieroglyphs {};
+    struct ethiopic {};
+    struct georgian {};
+    struct glagolitic {};
+    struct gothic {};
+    struct greek {};
+    struct gujarati {};
+    struct gurmukhi {};
+    struct hangul {};
+    struct han {};
+    struct hanunoo {};
+    struct hebrew {};
+    struct hiragana {};
+    struct katakana_or_hiragana {};
+    struct old_italic {};
+    struct javanese {};
+    struct kayah_li {};
+    struct katakana {};
+    struct kharoshthi {};
+    struct khmer {};
+    struct kannada {};
+    struct kaithi {};
+    struct tai_tham {};
+    struct lao {};
+    struct latin {};
+    struct lepcha {};
+    struct limbu {};
+    struct linear_b {};
+    struct lisu {};
+    struct lycian {};
+    struct lydian {};
+    struct malayalam {};
+    struct mongolian {};
+    struct meetei_mayek {};
+    struct myanmar {};
+    struct nko {};
+    struct ogham {};
+    struct ol_chiki {};
+    struct old_turkic {};
+    struct oriya {};
+    struct osmanya {};
+    struct phags_pa {};
+    struct inscriptional_pahlavi {};
+    struct phoenician {};
+    struct inscriptional_parthian {};
+    struct rejang {};
+    struct runic {};
+    struct samaritan {};
+    struct old_south_arabian {};
+    struct saurashtra {};
+    struct shavian {};
+    struct sinhala {};
+    struct sundanese {};
+    struct syloti_nagri {};
+    struct syriac {};
+    struct tagbanwa {};
+    struct tai_le {};
+    struct new_tai_lue {};
+    struct tamil {};
+    struct tai_viet {};
+    struct telugu {};
+    struct tifinagh {};
+    struct tagalog {};
+    struct thaana {};
+    struct thai {};
+    struct tibetan {};
+    struct ugaritic {};
+    struct vai {};
+    struct old_persian {};
+    struct cuneiform {};
+    struct yi {};
+    struct inherited {};
+    struct common {};
+    struct unknown {};
+#endif
+
     ///////////////////////////////////////////////////////////////////////////
     // This composite tag type encodes both the character
     // set and the specific char tag (used for classification
@@ -77,113 +239,222 @@
     {
         typedef typename CharEncoding::char_type char_type;
 
-        template <typename Char>
-        static bool
-        is(tag::char_, Char ch)
-        {
-            return CharEncoding::ischar(char_type(ch));
-        }
-
-        template <typename Char>
-        static bool
-        is(tag::alnum, Char ch)
-        {
-            return CharEncoding::isalnum(char_type(ch));
-        }
-
-        template <typename Char>
-        static bool
-        is(tag::alpha, Char ch)
-        {
-            return CharEncoding::isalpha(char_type(ch));
-        }
-
-        template <typename Char>
-        static bool
-        is(tag::digit, Char ch)
-        {
-            return CharEncoding::isdigit(char_type(ch));
-        }
-
-        template <typename Char>
-        static bool
-        is(tag::xdigit, Char ch)
-        {
-            return CharEncoding::isxdigit(char_type(ch));
-        }
-
-        template <typename Char>
-        static bool
-        is(tag::cntrl, Char ch)
-        {
-            return CharEncoding::iscntrl(char_type(ch));
-        }
-
-        template <typename Char>
-        static bool
-        is(tag::graph, Char ch)
-        {
-            return CharEncoding::isgraph(char_type(ch));
-        }
+#define BOOST_SPIRIT_CLASSIFY(name, isname)                                     \
+        template <typename Char>                                                \
+        static bool                                                             \
+        is(tag::name, Char ch)                                                  \
+        {                                                                       \
+            return CharEncoding::isname                                         \
+                BOOST_PREVENT_MACRO_SUBSTITUTION (char_type(ch));               \
+        }                                                                       \
+        /***/
+
+        BOOST_SPIRIT_CLASSIFY(char_, ischar)
+        BOOST_SPIRIT_CLASSIFY(alnum, isalnum)
+        BOOST_SPIRIT_CLASSIFY(alpha, isalpha)
+        BOOST_SPIRIT_CLASSIFY(digit, isdigit)
+        BOOST_SPIRIT_CLASSIFY(xdigit, isxdigit)
+        BOOST_SPIRIT_CLASSIFY(cntrl, iscntrl)
+        BOOST_SPIRIT_CLASSIFY(graph, isgraph)
+        BOOST_SPIRIT_CLASSIFY(lower, islower)
+        BOOST_SPIRIT_CLASSIFY(print, isprint)
+        BOOST_SPIRIT_CLASSIFY(punct, ispunct)
+        BOOST_SPIRIT_CLASSIFY(space, isspace)
+        BOOST_SPIRIT_CLASSIFY(blank, isblank)
+        BOOST_SPIRIT_CLASSIFY(upper, isupper)
 
-        template <typename Char>
-        static bool
-        is(tag::lower, Char ch)
-        {
-            return CharEncoding::islower(char_type(ch));
-        }
+#undef BOOST_SPIRIT_CLASSIFY
 
         template <typename Char>
         static bool
         is(tag::lowernum, Char ch)
         {
-            return CharEncoding::islower(char_type(ch)) || 
+            return CharEncoding::islower(char_type(ch)) ||
                    CharEncoding::isdigit(char_type(ch));
         }
 
         template <typename Char>
         static bool
-        is(tag::print, Char ch)
-        {
-            return CharEncoding::isprint(char_type(ch));
-        }
-
-        template <typename Char>
-        static bool
-        is(tag::punct, Char ch)
+        is(tag::uppernum, Char ch)
         {
-            return CharEncoding::ispunct(char_type(ch));
+            return CharEncoding::isupper(char_type(ch)) ||
+                   CharEncoding::isdigit(char_type(ch));
         }
 
-        template <typename Char>
-        static bool
-        is(tag::space, Char ch)
-        {
-            return CharEncoding::isspace(char_type(ch));
-        }
+#if defined(BOOST_SPIRIT_UNICODE)
 
-        template <typename Char>
-        static bool
-        is(tag::blank, Char ch)
-        {
-            return CharEncoding::isblank
-                BOOST_PREVENT_MACRO_SUBSTITUTION (char_type(ch));
-        }
+#define BOOST_SPIRIT_UNICODE_CLASSIFY(name)                                     \
+        template <typename Char>                                                \
+        static bool                                                             \
+        is(tag::name, Char ch)                                                  \
+        {                                                                       \
+            return CharEncoding::is_##name(char_type(ch));                      \
+        }                                                                       \
+        /***/
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Major Categories
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY(letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(mark)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(number)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(separator)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(other)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(symbol)
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode General Categories
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY(uppercase_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(lowercase_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(titlecase_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(modifier_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(other_letter)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY(nonspacing_mark)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(enclosing_mark)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(spacing_mark)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY(decimal_number)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(letter_number)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(other_number)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY(space_separator)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(line_separator)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(paragraph_separator)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY(control)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(format)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(private_use)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(surrogate)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(unassigned)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY(dash_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(open_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(close_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(connector_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(other_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(initial_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(final_punctuation)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY(math_symbol)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(currency_symbol)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(modifier_symbol)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(other_symbol)
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Derived Categories
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY(alphabetic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(uppercase)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(lowercase)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(white_space)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(hex_digit)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(noncharacter_code_point)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(default_ignorable_code_point)
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Scripts
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY(arabic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(imperial_aramaic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(armenian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(avestan)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(balinese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(bamum)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(bengali)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(bopomofo)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(braille)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(buginese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(buhid)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(canadian_aboriginal)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(carian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(cham)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(cherokee)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(coptic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(cypriot)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(cyrillic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(devanagari)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(deseret)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(egyptian_hieroglyphs)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(ethiopic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(georgian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(glagolitic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(gothic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(greek)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(gujarati)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(gurmukhi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(hangul)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(han)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(hanunoo)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(hebrew)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(hiragana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(katakana_or_hiragana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(old_italic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(javanese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(kayah_li)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(katakana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(kharoshthi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(khmer)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(kannada)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(kaithi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tai_tham)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(lao)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(latin)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(lepcha)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(limbu)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(linear_b)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(lisu)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(lycian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(lydian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(malayalam)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(mongolian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(meetei_mayek)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(myanmar)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(nko)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(ogham)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(ol_chiki)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(old_turkic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(oriya)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(osmanya)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(phags_pa)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(inscriptional_pahlavi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(phoenician)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(inscriptional_parthian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(rejang)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(runic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(samaritan)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(old_south_arabian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(saurashtra)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(shavian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(sinhala)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(sundanese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(syloti_nagri)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(syriac)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tagbanwa)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tai_le)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(new_tai_lue)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tamil)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tai_viet)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(telugu)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tifinagh)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tagalog)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(thaana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(thai)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(tibetan)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(ugaritic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(vai)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(old_persian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(cuneiform)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(yi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(inherited)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(common)
+    BOOST_SPIRIT_UNICODE_CLASSIFY(unknown)
 
-        template <typename Char>
-        static bool
-        is(tag::upper, Char ch)
-        {
-            return CharEncoding::isupper(char_type(ch));
-        }
+#undef BOOST_SPIRIT_UNICODE_CLASSIFY
+#endif
 
-        template <typename Char>
-        static bool
-        is(tag::uppernum, Char ch)
-        {
-            return CharEncoding::isupper(char_type(ch)) || 
-                   CharEncoding::isdigit(char_type(ch));
-        }
     };
 
     ///////////////////////////////////////////////////////////////////////////
@@ -227,100 +498,219 @@
     template <typename CharEncoding>
     struct what
     {
-        static char const* is(tag::char_)
-        {
-            return "char";
-        }
-
-        static char const* is(tag::alnum)
-        {
-            return "alnum";
-        }
-
-        static char const* is(tag::alpha)
-        {
-            return "alpha";
-        }
-
-        static char const* is(tag::digit)
-        {
-            return "digit";
-        }
-
-        static char const* is(tag::xdigit)
-        {
-            return "xdigit";
-        }
-
-        static char const* is(tag::cntrl)
-        {
-            return "cntrl";
-        }
-
-        static char const* is(tag::graph)
-        {
-            return "graph";
-        }
-
-        static char const* is(tag::lower)
-        {
-            return "lower";
-        }
-
-        static char const* is(tag::lowernum)
-        {
-            return "lowernum";
-        }
-
-        static char const* is(tag::print)
-        {
-            return "print";
-        }
-
-        static char const* is(tag::punct)
-        {
-            return "punct";
-        }
+#define BOOST_SPIRIT_CLASSIFY_WHAT(name, isname)                                \
+        static char const* is(tag::name)                                        \
+        {                                                                       \
+            return isname;                                                      \
+        }                                                                       \
+        /***/
+
+        BOOST_SPIRIT_CLASSIFY_WHAT(char_, "char")
+        BOOST_SPIRIT_CLASSIFY_WHAT(alnum, "alnum")
+        BOOST_SPIRIT_CLASSIFY_WHAT(alpha, "alpha")
+        BOOST_SPIRIT_CLASSIFY_WHAT(digit, "digit")
+        BOOST_SPIRIT_CLASSIFY_WHAT(xdigit, "xdigit")
+        BOOST_SPIRIT_CLASSIFY_WHAT(cntrl, "cntrl")
+        BOOST_SPIRIT_CLASSIFY_WHAT(graph, "graph")
+        BOOST_SPIRIT_CLASSIFY_WHAT(lower, "lower")
+        BOOST_SPIRIT_CLASSIFY_WHAT(lowernum, "lowernum")
+        BOOST_SPIRIT_CLASSIFY_WHAT(print, "print")
+        BOOST_SPIRIT_CLASSIFY_WHAT(punct, "punct")
+        BOOST_SPIRIT_CLASSIFY_WHAT(space, "space")
+        BOOST_SPIRIT_CLASSIFY_WHAT(blank, "blank")
+        BOOST_SPIRIT_CLASSIFY_WHAT(upper, "upper")
+        BOOST_SPIRIT_CLASSIFY_WHAT(uppernum, "uppernum")
+        BOOST_SPIRIT_CLASSIFY_WHAT(ucs4, "ucs4")
+
+#undef BOOST_SPIRIT_CLASSIFY_WHAT
+
+#if defined(BOOST_SPIRIT_UNICODE)
+
+#define BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(name)                                \
+        static char const* is(tag::name)                                        \
+        {                                                                       \
+            return BOOST_PP_STRINGIZE(name);                                    \
+        }                                                                       \
+        /***/
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Major Categories
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(mark)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(number)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(separator)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(symbol)
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode General Categories
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(uppercase_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lowercase_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(titlecase_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(modifier_letter)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_letter)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(nonspacing_mark)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(enclosing_mark)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(spacing_mark)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(decimal_number)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(letter_number)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_number)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(space_separator)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(line_separator)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(paragraph_separator)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(control)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(format)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(private_use)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(surrogate)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(unassigned)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(dash_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(open_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(close_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(connector_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(initial_punctuation)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(final_punctuation)
+
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(math_symbol)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(currency_symbol)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(modifier_symbol)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(other_symbol)
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Derived Categories
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(alphabetic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(uppercase)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lowercase)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(white_space)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hex_digit)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(noncharacter_code_point)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(default_ignorable_code_point)
+
+///////////////////////////////////////////////////////////////////////////
+//  Unicode Scripts
+///////////////////////////////////////////////////////////////////////////
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(arabic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(imperial_aramaic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(armenian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(avestan)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(balinese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(bamum)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(bengali)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(bopomofo)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(braille)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(buginese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(buhid)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(canadian_aboriginal)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(carian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cham)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cherokee)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(coptic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cypriot)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cyrillic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(devanagari)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(deseret)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(egyptian_hieroglyphs)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ethiopic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(georgian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(glagolitic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(gothic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(greek)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(gujarati)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(gurmukhi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hangul)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(han)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hanunoo)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hebrew)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(hiragana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(katakana_or_hiragana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_italic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(javanese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kayah_li)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(katakana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kharoshthi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(khmer)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kannada)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(kaithi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tai_tham)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lao)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(latin)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lepcha)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(limbu)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(linear_b)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lisu)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lycian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(lydian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(malayalam)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(mongolian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(meetei_mayek)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(myanmar)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(nko)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ogham)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ol_chiki)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_turkic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(oriya)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(osmanya)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(phags_pa)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(inscriptional_pahlavi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(phoenician)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(inscriptional_parthian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(rejang)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(runic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(samaritan)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_south_arabian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(saurashtra)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(shavian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(sinhala)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(sundanese)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(syloti_nagri)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(syriac)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tagbanwa)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tai_le)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(new_tai_lue)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tamil)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tai_viet)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(telugu)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tifinagh)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tagalog)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(thaana)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(thai)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(tibetan)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(ugaritic)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(vai)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(old_persian)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(cuneiform)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(yi)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(inherited)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(common)
+    BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT(unknown)
 
-        static char const* is(tag::space)
-        {
-            return "space";
-        }
-
-        static char const* is(tag::blank)
-        {
-            return "blank";
-        }
-
-        static char const* is(tag::upper)
-        {
-            return "upper";
-        }
-
-        static char const* is(tag::uppernum)
-        {
-            return "uppernum";
-        }
+#undef BOOST_SPIRIT_UNICODE_CLASSIFY_WHAT
+#endif
 
-        static char const* is(tag::ucs4)
-        {
-            return "ucs4";
-        }
     };
 }}}
 
-namespace boost { namespace spirit { namespace traits 
+namespace boost { namespace spirit { namespace traits
 {
     ///////////////////////////////////////////////////////////////////////////
-    // This meta-function evaluates to mpl::true_ if the function 
+    // This meta-function evaluates to mpl::true_ if the function
     // char_encoding::ischar() needs to be called to ensure correct matching.
     // This happens mainly if the character type returned from the underlying
-    // iterator is larger than the character type of the used character 
-    // encoding. Additionally, this meta-function provides a customization 
+    // iterator is larger than the character type of the used character
+    // encoding. Additionally, this meta-function provides a customization
     // point for the lexer library to enforce this behavior while parsing
     // a token stream.
     template <typename Char, typename BaseChar>
-    struct mustcheck_ischar 
+    struct mustcheck_ischar
       : mpl::bool_<(sizeof(Char) > sizeof(BaseChar)) ? true : false> {};
 
     ///////////////////////////////////////////////////////////////////////////
Modified: trunk/boost/spirit/home/support/char_encoding/unicode.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode.hpp	(original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode.hpp	2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -24,6 +24,9 @@
     {
         typedef ::boost::uint32_t char_type;
 
+    ///////////////////////////////////////////////////////////////////////////
+    //  Posix stuff
+    ///////////////////////////////////////////////////////////////////////////
         static bool
         isascii_(char_type ch)
         {
@@ -58,7 +61,7 @@
         static bool
         isxdigit(char_type ch)
         {
-            return ucd::is_hexadecimal_number(ch);
+            return ucd::is_hex_digit(ch);
         }
 
         static bool
@@ -130,6 +133,204 @@
         {
             return ch;
         }
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  Major Categories
+    ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_MAJOR_CATEGORY(name)                                       \
+        static bool                                                             \
+        is_##name(char_type ch)                                                 \
+        {                                                                       \
+            return ucd::get_major_category(ch) == ucd::properties::name;        \
+        }                                                                       \
+        /***/
+
+        BOOST_SPIRIT_MAJOR_CATEGORY(letter)
+        BOOST_SPIRIT_MAJOR_CATEGORY(mark)
+        BOOST_SPIRIT_MAJOR_CATEGORY(number)
+        BOOST_SPIRIT_MAJOR_CATEGORY(separator)
+        BOOST_SPIRIT_MAJOR_CATEGORY(other)
+        BOOST_SPIRIT_MAJOR_CATEGORY(punctuation)
+        BOOST_SPIRIT_MAJOR_CATEGORY(symbol)
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  General Categories
+    ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_CATEGORY(name)                                             \
+        static bool                                                             \
+        is_##name(char_type ch)                                                 \
+        {                                                                       \
+            return ucd::get_category(ch) == ucd::properties::name;              \
+        }                                                                       \
+        /***/
+
+        BOOST_SPIRIT_CATEGORY(uppercase_letter)
+        BOOST_SPIRIT_CATEGORY(lowercase_letter)
+        BOOST_SPIRIT_CATEGORY(titlecase_letter)
+        BOOST_SPIRIT_CATEGORY(modifier_letter)
+        BOOST_SPIRIT_CATEGORY(other_letter)
+
+        BOOST_SPIRIT_CATEGORY(nonspacing_mark)
+        BOOST_SPIRIT_CATEGORY(enclosing_mark)
+        BOOST_SPIRIT_CATEGORY(spacing_mark)
+
+        BOOST_SPIRIT_CATEGORY(decimal_number)
+        BOOST_SPIRIT_CATEGORY(letter_number)
+        BOOST_SPIRIT_CATEGORY(other_number)
+
+        BOOST_SPIRIT_CATEGORY(space_separator)
+        BOOST_SPIRIT_CATEGORY(line_separator)
+        BOOST_SPIRIT_CATEGORY(paragraph_separator)
+
+        BOOST_SPIRIT_CATEGORY(control)
+        BOOST_SPIRIT_CATEGORY(format)
+        BOOST_SPIRIT_CATEGORY(private_use)
+        BOOST_SPIRIT_CATEGORY(surrogate)
+        BOOST_SPIRIT_CATEGORY(unassigned)
+
+        BOOST_SPIRIT_CATEGORY(dash_punctuation)
+        BOOST_SPIRIT_CATEGORY(open_punctuation)
+        BOOST_SPIRIT_CATEGORY(close_punctuation)
+        BOOST_SPIRIT_CATEGORY(connector_punctuation)
+        BOOST_SPIRIT_CATEGORY(other_punctuation)
+        BOOST_SPIRIT_CATEGORY(initial_punctuation)
+        BOOST_SPIRIT_CATEGORY(final_punctuation)
+
+        BOOST_SPIRIT_CATEGORY(math_symbol)
+        BOOST_SPIRIT_CATEGORY(currency_symbol)
+        BOOST_SPIRIT_CATEGORY(modifier_symbol)
+        BOOST_SPIRIT_CATEGORY(other_symbol)
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  Derived Categories
+    ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_DERIVED_CATEGORY(name)                                     \
+        static bool                                                             \
+        is_##name(char_type ch)                                                 \
+        {                                                                       \
+            return ucd::is_##name(ch);                                          \
+        }                                                                       \
+        /***/
+
+        BOOST_SPIRIT_DERIVED_CATEGORY(alphabetic)
+        BOOST_SPIRIT_DERIVED_CATEGORY(uppercase)
+        BOOST_SPIRIT_DERIVED_CATEGORY(lowercase)
+        BOOST_SPIRIT_DERIVED_CATEGORY(white_space)
+        BOOST_SPIRIT_DERIVED_CATEGORY(hex_digit)
+        BOOST_SPIRIT_DERIVED_CATEGORY(noncharacter_code_point)
+        BOOST_SPIRIT_DERIVED_CATEGORY(default_ignorable_code_point)
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  Scripts
+    ///////////////////////////////////////////////////////////////////////////
+#define BOOST_SPIRIT_SCRIPT(name)                                               \
+        static bool                                                             \
+        is_##name(char_type ch)                                                 \
+        {                                                                       \
+            return ucd::get_script(ch) == ucd::properties::name;                \
+        }                                                                       \
+        /***/
+
+        BOOST_SPIRIT_SCRIPT(arabic)
+        BOOST_SPIRIT_SCRIPT(imperial_aramaic)
+        BOOST_SPIRIT_SCRIPT(armenian)
+        BOOST_SPIRIT_SCRIPT(avestan)
+        BOOST_SPIRIT_SCRIPT(balinese)
+        BOOST_SPIRIT_SCRIPT(bamum)
+        BOOST_SPIRIT_SCRIPT(bengali)
+        BOOST_SPIRIT_SCRIPT(bopomofo)
+        BOOST_SPIRIT_SCRIPT(braille)
+        BOOST_SPIRIT_SCRIPT(buginese)
+        BOOST_SPIRIT_SCRIPT(buhid)
+        BOOST_SPIRIT_SCRIPT(canadian_aboriginal)
+        BOOST_SPIRIT_SCRIPT(carian)
+        BOOST_SPIRIT_SCRIPT(cham)
+        BOOST_SPIRIT_SCRIPT(cherokee)
+        BOOST_SPIRIT_SCRIPT(coptic)
+        BOOST_SPIRIT_SCRIPT(cypriot)
+        BOOST_SPIRIT_SCRIPT(cyrillic)
+        BOOST_SPIRIT_SCRIPT(devanagari)
+        BOOST_SPIRIT_SCRIPT(deseret)
+        BOOST_SPIRIT_SCRIPT(egyptian_hieroglyphs)
+        BOOST_SPIRIT_SCRIPT(ethiopic)
+        BOOST_SPIRIT_SCRIPT(georgian)
+        BOOST_SPIRIT_SCRIPT(glagolitic)
+        BOOST_SPIRIT_SCRIPT(gothic)
+        BOOST_SPIRIT_SCRIPT(greek)
+        BOOST_SPIRIT_SCRIPT(gujarati)
+        BOOST_SPIRIT_SCRIPT(gurmukhi)
+        BOOST_SPIRIT_SCRIPT(hangul)
+        BOOST_SPIRIT_SCRIPT(han)
+        BOOST_SPIRIT_SCRIPT(hanunoo)
+        BOOST_SPIRIT_SCRIPT(hebrew)
+        BOOST_SPIRIT_SCRIPT(hiragana)
+        BOOST_SPIRIT_SCRIPT(katakana_or_hiragana)
+        BOOST_SPIRIT_SCRIPT(old_italic)
+        BOOST_SPIRIT_SCRIPT(javanese)
+        BOOST_SPIRIT_SCRIPT(kayah_li)
+        BOOST_SPIRIT_SCRIPT(katakana)
+        BOOST_SPIRIT_SCRIPT(kharoshthi)
+        BOOST_SPIRIT_SCRIPT(khmer)
+        BOOST_SPIRIT_SCRIPT(kannada)
+        BOOST_SPIRIT_SCRIPT(kaithi)
+        BOOST_SPIRIT_SCRIPT(tai_tham)
+        BOOST_SPIRIT_SCRIPT(lao)
+        BOOST_SPIRIT_SCRIPT(latin)
+        BOOST_SPIRIT_SCRIPT(lepcha)
+        BOOST_SPIRIT_SCRIPT(limbu)
+        BOOST_SPIRIT_SCRIPT(linear_b)
+        BOOST_SPIRIT_SCRIPT(lisu)
+        BOOST_SPIRIT_SCRIPT(lycian)
+        BOOST_SPIRIT_SCRIPT(lydian)
+        BOOST_SPIRIT_SCRIPT(malayalam)
+        BOOST_SPIRIT_SCRIPT(mongolian)
+        BOOST_SPIRIT_SCRIPT(meetei_mayek)
+        BOOST_SPIRIT_SCRIPT(myanmar)
+        BOOST_SPIRIT_SCRIPT(nko)
+        BOOST_SPIRIT_SCRIPT(ogham)
+        BOOST_SPIRIT_SCRIPT(ol_chiki)
+        BOOST_SPIRIT_SCRIPT(old_turkic)
+        BOOST_SPIRIT_SCRIPT(oriya)
+        BOOST_SPIRIT_SCRIPT(osmanya)
+        BOOST_SPIRIT_SCRIPT(phags_pa)
+        BOOST_SPIRIT_SCRIPT(inscriptional_pahlavi)
+        BOOST_SPIRIT_SCRIPT(phoenician)
+        BOOST_SPIRIT_SCRIPT(inscriptional_parthian)
+        BOOST_SPIRIT_SCRIPT(rejang)
+        BOOST_SPIRIT_SCRIPT(runic)
+        BOOST_SPIRIT_SCRIPT(samaritan)
+        BOOST_SPIRIT_SCRIPT(old_south_arabian)
+        BOOST_SPIRIT_SCRIPT(saurashtra)
+        BOOST_SPIRIT_SCRIPT(shavian)
+        BOOST_SPIRIT_SCRIPT(sinhala)
+        BOOST_SPIRIT_SCRIPT(sundanese)
+        BOOST_SPIRIT_SCRIPT(syloti_nagri)
+        BOOST_SPIRIT_SCRIPT(syriac)
+        BOOST_SPIRIT_SCRIPT(tagbanwa)
+        BOOST_SPIRIT_SCRIPT(tai_le)
+        BOOST_SPIRIT_SCRIPT(new_tai_lue)
+        BOOST_SPIRIT_SCRIPT(tamil)
+        BOOST_SPIRIT_SCRIPT(tai_viet)
+        BOOST_SPIRIT_SCRIPT(telugu)
+        BOOST_SPIRIT_SCRIPT(tifinagh)
+        BOOST_SPIRIT_SCRIPT(tagalog)
+        BOOST_SPIRIT_SCRIPT(thaana)
+        BOOST_SPIRIT_SCRIPT(thai)
+        BOOST_SPIRIT_SCRIPT(tibetan)
+        BOOST_SPIRIT_SCRIPT(ugaritic)
+        BOOST_SPIRIT_SCRIPT(vai)
+        BOOST_SPIRIT_SCRIPT(old_persian)
+        BOOST_SPIRIT_SCRIPT(cuneiform)
+        BOOST_SPIRIT_SCRIPT(yi)
+        BOOST_SPIRIT_SCRIPT(inherited)
+        BOOST_SPIRIT_SCRIPT(common)
+        BOOST_SPIRIT_SCRIPT(unknown)
+
+#undef BOOST_SPIRIT_MAJOR_CATEGORY
+#undef BOOST_SPIRIT_CATEGORY
+#undef BOOST_SPIRIT_DERIVED_CATEGORY
+#undef BOOST_SPIRIT_SCRIPT
+
     };
 
 }}}
Modified: trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp
==============================================================================
--- trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp	(original)
+++ trunk/boost/spirit/home/support/char_encoding/unicode/query.hpp	2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -10,10 +10,12 @@
 #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
 #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
 
-#include "category_table.hpp"
-#include "script_table.hpp"
-#include "lowercase_table.hpp"
-#include "uppercase_table.hpp"
+#include <boost/cstdint.hpp>
+
+# include "category_table.hpp"
+# include "script_table.hpp"
+# include "lowercase_table.hpp"
+# include "uppercase_table.hpp"
 
 namespace boost { namespace spirit { namespace ucd
 {    
@@ -71,7 +73,7 @@
             initial_punctuation,    // [Pi] an initial quotation mark 
             final_punctuation,      // [Pf] a final quotation mark
                                     
-            math_symboll = 48,      // [Sm] a symbol of primarily mathematical use
+            math_symbol = 48,       // [Sm] a symbol of primarily mathematical use
             currency_symbol,        // [Sc] a currency sign 
             modifier_symbol,        // [Sk] a non-letterlike modifier symbol
             other_symbol            // [So] a symbol of other type 
@@ -207,7 +209,7 @@
         return get_category(ch) == properties::decimal_number;
     }
     
-    inline bool is_hexadecimal_number(::boost::uint32_t ch)
+    inline bool is_hex_digit(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::hex_digit) != 0;
     }  
Modified: trunk/boost/spirit/home/support/common_terminals.hpp
==============================================================================
--- trunk/boost/spirit/home/support/common_terminals.hpp	(original)
+++ trunk/boost/spirit/home/support/common_terminals.hpp	2010-02-03 07:35:10 EST (Wed, 03 Feb 2010)
@@ -17,9 +17,12 @@
 #include <boost/spirit/home/support/char_encoding/standard_wide.hpp>
 #include <boost/spirit/home/support/char_encoding/ascii.hpp>
 #include <boost/spirit/home/support/char_encoding/iso8859_1.hpp>
-#include <boost/spirit/home/support/char_encoding/unicode.hpp>
 #include <boost/spirit/home/support/char_class.hpp>
 
+#if defined(BOOST_SPIRIT_UNICODE)
+# include <boost/spirit/home/support/char_encoding/unicode.hpp>
+#endif
+
 namespace boost { namespace spirit
 {
     // Our basic terminals
@@ -87,18 +90,18 @@
     )
 
     // special tags (used mainly for stateful tag types)
-    namespace tag 
-    { 
-        struct attr_cast {}; 
+    namespace tag
+    {
+        struct attr_cast {};
     }
 
 }}
 
 ///////////////////////////////////////////////////////////////////////////////
 // Here we place the character-set sensitive placeholders. We have one set
-// each for ascii, iso8859_1, standard and standard_wide. These placeholders
-// are placed in its char-set namespace. For example, there exist a placeholder
-// spirit::ascii::alnum for ascii versions of alnum.
+// each for ascii, iso8859_1, standard and standard_wide and unicode. These
+// placeholders are placed in its char-set namespace. For example, there exist
+// a placeholder spirit::ascii::alnum for ascii versions of alnum.
 
 #define BOOST_SPIRIT_TAG_CHAR_SPEC(charset)                                     \
     typedef tag::char_code<tag::char_, charset> char_;                          \
@@ -157,6 +160,182 @@
 BOOST_SPIRIT_DEFINE_CHAR_CODES(iso8859_1)
 BOOST_SPIRIT_DEFINE_CHAR_CODES(standard)
 BOOST_SPIRIT_DEFINE_CHAR_CODES(standard_wide)
+
+#if defined(BOOST_SPIRIT_UNICODE)
 BOOST_SPIRIT_DEFINE_CHAR_CODES(unicode)
 
+    namespace boost { namespace spirit { namespace tag { namespace unicode
+    {
+        BOOST_SPIRIT_TAG_CHAR_SPEC(spirit::char_encoding::unicode)
+    }}}}
+
+    namespace boost { namespace spirit { namespace unicode
+    {
+#define BOOST_SPIRIT_UNICODE_CHAR_CODE(name)                                    \
+    BOOST_SPIRIT_CHAR_CODE(name, spirit::char_encoding::unicode)                \
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  Unicode Major Categories
+    ///////////////////////////////////////////////////////////////////////////
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(letter)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(mark)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(number)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(separator)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(other)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(punctuation)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(symbol)
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  Unicode General Categories
+    ///////////////////////////////////////////////////////////////////////////
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(uppercase_letter)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(lowercase_letter)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(titlecase_letter)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(modifier_letter)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(other_letter)
+
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(nonspacing_mark)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(enclosing_mark)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(spacing_mark)
+
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(decimal_number)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(letter_number)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(other_number)
+
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(space_separator)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(line_separator)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(paragraph_separator)
+
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(control)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(format)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(private_use)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(surrogate)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(unassigned)
+
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(dash_punctuation)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(open_punctuation)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(close_punctuation)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(connector_punctuation)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(other_punctuation)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(initial_punctuation)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(final_punctuation)
+
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(math_symbol)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(currency_symbol)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(modifier_symbol)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(other_symbol)
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  Unicode Derived Categories
+    ///////////////////////////////////////////////////////////////////////////
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(alphabetic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(uppercase)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(lowercase)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(white_space)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(hex_digit)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(noncharacter_code_point)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(default_ignorable_code_point)
+
+    ///////////////////////////////////////////////////////////////////////////
+    //  Unicode Scripts
+    ///////////////////////////////////////////////////////////////////////////
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(arabic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(imperial_aramaic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(armenian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(avestan)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(balinese)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(bamum)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(bengali)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(bopomofo)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(braille)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(buginese)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(buhid)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(canadian_aboriginal)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(carian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(cham)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(cherokee)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(coptic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(cypriot)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(cyrillic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(devanagari)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(deseret)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(egyptian_hieroglyphs)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(ethiopic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(georgian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(glagolitic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(gothic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(greek)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(gujarati)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(gurmukhi)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(hangul)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(han)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(hanunoo)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(hebrew)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(hiragana)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(katakana_or_hiragana)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(old_italic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(javanese)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(kayah_li)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(katakana)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(kharoshthi)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(khmer)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(kannada)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(kaithi)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tai_tham)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(lao)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(latin)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(lepcha)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(limbu)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(linear_b)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(lisu)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(lycian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(lydian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(malayalam)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(mongolian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(meetei_mayek)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(myanmar)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(nko)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(ogham)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(ol_chiki)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(old_turkic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(oriya)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(osmanya)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(phags_pa)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(inscriptional_pahlavi)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(phoenician)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(inscriptional_parthian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(rejang)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(runic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(samaritan)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(old_south_arabian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(saurashtra)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(shavian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(sinhala)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(sundanese)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(syloti_nagri)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(syriac)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tagbanwa)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tai_le)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(new_tai_lue)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tamil)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tai_viet)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(telugu)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tifinagh)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tagalog)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(thaana)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(thai)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(tibetan)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(ugaritic)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(vai)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(old_persian)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(cuneiform)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(yi)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(inherited)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(common)
+        BOOST_SPIRIT_UNICODE_CHAR_CODE(unknown)
+
+#undef BOOST_SPIRIT_UNICODE_CHAR_CODE
+    }}}
+#endif
+
 #endif