$include_dir="/home/hyper-archives/boost-users/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-users] [boost::property_tree] rapidxml get_index bug for UTF8 ?
From: ÇÇ־ǿ (qiaozhiqiang_at_[hidden])
Date: 2011-03-04 01:04:02
When use UTF8, non ASCII char is > 127, but char is signed, 
So get_index() return a big value.
char c = -120;
get_index(c)  
VC2010 say:
boost::property_tree::detail::rapidxml::internal::get_index<char> returned 4294967168	unsigned int
then 
internal::lookup_tables<0>::lookup_whitespace[internal::get_index(ch)]
is error.
My patch:
        inline size_t get_index(const Ch c)
        {
            // ***  char c (ASCII / UTF8) and wchar_t c: 0 ~ 127 is ASCII char 
            size_t r = c; //******** convert to unsigned
            // If not ASCII char, its sematic is same as plain 'z'
            // if (c > 255) //********* ASSCII is 0 to 127 
            if (r > 127)  //******** check r, or check if(c < 0 || c > 127) 
            {
                return 'z';
            }
            return r; //******** return r
        }
  
This is boost code:
boost_1_46_0\boost\property_tree\detail\rapidxml.hpp
         template<class Ch>
        inline size_t get_index(const Ch c)
        {
            // If not ASCII char, its sematic is same as plain 'z'
            if (c > 255)
            {
                return 'z';
            }
            return c;
        }
        // Detect whitespace character
        struct whitespace_pred
        {
            static unsigned char test(Ch ch)
            {
                return internal::lookup_tables<0>::lookup_whitespace[internal::get_index(ch)];
            }
        };
        // Detect node name character
        struct node_name_pred
        {
            static unsigned char test(Ch ch)
            {
                return internal::lookup_tables<0>::lookup_node_name[internal::get_index(ch)];
            }
        };