$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r53823 - in sandbox/SOC/2009/unicode: boost/unicode libs/unicode/data_parser
From: loufoque_at_[hidden]
Date: 2009-06-12 10:10:05
Author: mgaunard
Date: 2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
New Revision: 53823
URL: http://svn.boost.org/trac/boost/changeset/53823
Log:
decompose_for_sort fix by Graham Barnett
Text files modified: 
   sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp                         |    36 +++++++++---------                      
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp             |     2                                         
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp |    36 +++++++++---------                      
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp            |    73 ++++++++++++++++++++++++++++++--------- 
   sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp            |     7 +++                                     
   5 files changed, 100 insertions(+), 54 deletions(-)
Modified: sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp	(original)
+++ sandbox/SOC/2009/unicode/boost/unicode/unicode_properties_types.hpp	2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -178,24 +178,24 @@
                 {
                         enum type					// maximum 32 values (5 bits)
                         {
-				font = 0,
-				no_break,
-				initial,
-				medial,
-				final,
-				isolated,
-				circle,
-				super,
-				sub,
-				vertical,
-				wide,
-				narrow,
-				small,
-				square,
-				fraction,
-				compat,
-				canonical,
-				none,					// default value for unknown characters
+				dct_font = 0,
+				dct_no_break,
+				dct_initial,
+				dct_medial,
+				dct_final,
+				dct_isolated,
+				dct_circle,
+				dct_super,
+				dct_sub,
+				dct_vertical,
+				dct_wide,
+				dct_narrow,
+				dct_small,
+				dct_square,
+				dct_fraction,
+				dct_compat,
+				dct_canonical,
+				dct_none,					// default value for unknown characters
 
                                 _count
                         };
Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp	(original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties.cpp	2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -48,7 +48,7 @@
         char_not_present.general_category = category::unknown;
         char_not_present.combining = 0;
         char_not_present.bidi = bidi_class::strong_left_to_right;
-	char_not_present.decomposition_kind = decomposition_type::none;
+	char_not_present.decomposition_kind = decomposition_type::dct_none;
         char_not_present.line_break = break_class::unknown;
         char_not_present.joining = join_type::none;
         char_not_present.unknown_char = true;
Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp	(original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/read_character_properties_unicodedata.cpp	2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -221,60 +221,60 @@
                 // If none is indicated, it's a canonical decomposition_type.
                 (((str_p ("<font>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::font)] |
+				decomposition_type::dct_font)] |
                 str_p ("<noBreak>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::no_break)] |
+				decomposition_type::dct_no_break)] |
                 str_p ("<initial>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::initial)] |
+				decomposition_type::dct_initial)] |
                 str_p ("<medial>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::medial)] |
+				decomposition_type::dct_medial)] |
                 str_p ("<final>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::final)] |
+				decomposition_type::dct_final)] |
                 str_p ("<isolated>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::isolated)] |
+				decomposition_type::dct_isolated)] |
                 str_p ("<circle>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::circle)] |
+				decomposition_type::dct_circle)] |
                 str_p ("<super>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::super)] |
+				decomposition_type::dct_super)] |
                 str_p ("<sub>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::sub)] |
+				decomposition_type::dct_sub)] |
                 str_p ("<vertical>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::vertical)] |
+				decomposition_type::dct_vertical)] |
                 str_p ("<wide>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::wide)] |
+				decomposition_type::dct_wide)] |
                 str_p ("<narrow>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::narrow)] |
+				decomposition_type::dct_narrow)] |
                 str_p ("<small>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::small)] |
+				decomposition_type::dct_small)] |
                 str_p ("<square>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::square)] |
+				decomposition_type::dct_square)] |
                 str_p ("<fraction>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::fraction)] |
+				decomposition_type::dct_fraction)] |
                 str_p ("<compat>")
                         [assign_a (prop.decomposition_kind,
-				decomposition_type::compat)] |
+				decomposition_type::dct_compat)] |
                 eps_p [assign_a (prop.decomposition_kind,
-			decomposition_type::canonical)]
+			decomposition_type::dct_canonical)]
                 ) >>
                 // composition
                 +(hex_p [push_back_a (prop.decomposition)])
                 ) |
                 // or no composition at all
-		eps_p [assign_a (prop.decomposition_kind, decomposition_type::none)]
+		eps_p [assign_a (prop.decomposition_kind, decomposition_type::dct_none)]
                 ) >> ';' >>
 
                 // numeric type is skipped
Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp	(original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.cpp	2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -619,6 +619,7 @@
 }
 
 bool decompose_for_sort(const character_properties & props_char, 
+                        char32 cp, 
                         const std::map <char32, character_properties> & props,
                                             std::vector<collation_data> & tbl_coll)
 {
@@ -640,24 +641,62 @@
         }
         else
         {
-            // no optimisation should have happened at this point so all entries
-            // should be set to indexed
-            //assert(iter_char->second.sort_type == sort_type::is_index);
-
-            if (iter_char->second.sort_data.size() > 1)
-            {
-                // For simplicity we so not handle complex sort data
-                return false;
-            }
-            else if (iter_char->second.sort_data.size() == 0)
+            switch (iter_char->second.sort_type)
             {
-                return decompose_for_sort(iter_char->second, props, tbl_coll);
-            }
+            case sort_type::zero_data1_data2_cp:
+                {
+                    collation_data sort_entry;
+                    sort_entry.variable = iter_char->second.sort_variable;
+                    sort_entry.weight1 = 0;
+                    sort_entry.weight2 = iter_char->second.sort_index_or_data1;
+                    sort_entry.weight3 = iter_char->second.sort_data2;
+                    sort_entry.weight4 = cp;
+                    tbl_coll.push_back(sort_entry);
+                }
+                break;
+            case sort_type::data1_0x0020_data2_cp:
+                {
+                    collation_data sort_entry;
+                    sort_entry.variable = iter_char->second.sort_variable;
+                    sort_entry.weight1 = iter_char->second.sort_index_or_data1;
+                    sort_entry.weight2 = 0x0020;
+                    sort_entry.weight3 = iter_char->second.sort_data2;
+                    sort_entry.weight4 = cp;
+                    tbl_coll.push_back(sort_entry);
+                }
+                break;
+            case sort_type::default_:
+                {
+                    collation_entry sort_entry;
+                    get_default_sort_characteristics(cp, sort_entry);
+                    // this optimisation requires default to be of size 1
+                    assert(sort_entry.data.size() == 1); 
+                    tbl_coll.push_back(sort_entry.data[0]);
+                }
+                break;
+            case sort_type::is_index:
+                {
+                    if (iter_char->second.sort_data.size() > 1)
+                    {
+                        // For simplicity we do not handle complex sort data
+                        return false;
+                    }
+                    else if (iter_char->second.sort_data.size() == 0)
+                    {
+                        return decompose_for_sort(iter_char->second, iter_char->first, props, tbl_coll);
+                    }
 
-            tbl_coll.reserve(tbl_coll.size() + iter_char->second.sort_data[0].data.size());
-            copy(iter_char->second.sort_data[0].data.begin(), 
-                                                iter_char->second.sort_data[0].data.end(), 
-                                                            back_inserter(tbl_coll));
+                    tbl_coll.reserve(tbl_coll.size() + iter_char->second.sort_data[0].data.size());
+                    copy(iter_char->second.sort_data[0].data.begin(), 
+                                                        iter_char->second.sort_data[0].data.end(), 
+                                                                    back_inserter(tbl_coll));
+                }
+                break;
+            default:
+                // invalid enum
+                assert(iter_char->second.sort_type == (size_t)-1);
+                break;
+            }
         }
     }
 
@@ -692,7 +731,7 @@
     std::vector<collation_data> tbl_coll;
     
     // check for decomp and return false if it is too complex to optimise
-    if (!decompose_for_sort(props_char_var, props, tbl_coll))
+    if (!decompose_for_sort(props_char_var, cp, props, tbl_coll))
         return true;
 
     std::vector<collation_data>::const_iterator iter_src = 
Modified: sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp	(original)
+++ sandbox/SOC/2009/unicode/libs/unicode/data_parser/write_character_properties.hpp	2009-06-12 10:10:04 EDT (Fri, 12 Jun 2009)
@@ -17,7 +17,14 @@
 #include <vector>
 #include <map>
 #include <boost/spirit/include/classic.hpp>
+#ifdef BOOST_MSVC
+#pragma warning(push)
+#pragma warning(disable : 4800 ) // 'int' : forcing value to bool 'true' or 'false' 
+#endif
 #include <boost/crc.hpp>
+#ifdef BOOST_MSVC
+#pragma warning(pop)
+#endif
 #include <boost/tuple/tuple.hpp>
 
 #include <boost/unicode/unicode_properties.hpp>