$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r64117 - in sandbox/SOC/2009/unicode: boost/iterator boost/unicode libs/unicode/test libs/unicode/test/iterator libs/unicode/test/unicode
From: loufoque_at_[hidden]
Date: 2010-07-17 22:11:16
Author: mgaunard
Date: 2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
New Revision: 64117
URL: http://svn.boost.org/trac/boost/changeset/64117
Log:
experimental codecvt support for unicode converters
Added:
   sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp   (contents, props changed)
   sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp   (contents, props changed)
Text files modified: 
   sandbox/SOC/2009/unicode/boost/iterator/convert_iterator.hpp    |     2 +-                                      
   sandbox/SOC/2009/unicode/boost/unicode/compose_fwd.hpp          |     6 +++---                                  
   sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp           |    14 +++++++++++++-                          
   sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2           |     1 +                                       
   sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_utf.cpp |     2 +-                                      
   5 files changed, 19 insertions(+), 6 deletions(-)
Modified: sandbox/SOC/2009/unicode/boost/iterator/convert_iterator.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/iterator/convert_iterator.hpp	(original)
+++ sandbox/SOC/2009/unicode/boost/iterator/convert_iterator.hpp	2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
@@ -408,7 +408,7 @@
       adapter that wraps the range \c range and converts it
       step-by-step as the range is advanced. */                        \
     template<typename Range, typename... T>                            \
-    boost::segmented_range<                                            \
+    boost::converted_range<                                            \
         Range,                                                         \
         converter_name                                                 \
     >                                                                  \
Added: sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp	2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
@@ -0,0 +1,222 @@
+#ifndef BOOST_ITERATOR_CONVERTER_CODECVT_FACET_HPP
+#define BOOST_ITERATOR_CONVERTER_CODECVT_FACET_HPP
+
+#include <locale>
+#include <cstddef>
+
+#include <boost/iterator/convert_iterator.hpp>
+#include <boost/iterator/dummy_output_iterator.hpp>
+
+#include <algorithm>
+
+#include <map>
+#include <boost/range/algorithm.hpp>
+#include <boost/range/join.hpp>
+
+namespace boost
+{
+    
+template<typename InternT, typename P1, typename P2>
+struct converter_codecvt_facet : std::codecvt<InternT, typename P1::output_type, std::mbstate_t>  
+{
+    typedef InternT intern_type;
+    typedef typename P1::output_type extern_type;
+    typedef std::mbstate_t state_type;
+    
+    BOOST_CONCEPT_ASSERT((ConverterConcept<P1>));
+    BOOST_CONCEPT_ASSERT((ConverterConcept<P2>));
+    
+    BOOST_CONCEPT_ASSERT((Convertible<InternT, typename P1::input_type>));
+    BOOST_CONCEPT_ASSERT((Convertible<typename P2::output_type, InternT>));
+    
+    explicit converter_codecvt_facet(const P1& p1_ = P1(), const P2& p2_ = P2(), std::size_t refs = 0)
+        : std::codecvt<intern_type, extern_type, state_type>(refs), p1(p1_), p2(p2_)
+    {
+    }
+    
+private:
+    struct state_t
+    {
+        intern_type pending_data[P2::max_output::value];
+        size_t pending_size;
+    };
+    mutable std::map<state_type*, state_t> states;
+    
+    mutable P1 p1;
+    mutable P2 p2;
+    
+protected:
+
+    virtual std::codecvt_base::result do_in(
+        state_type& state, 
+        const extern_type* from,
+        const extern_type* from_end, 
+        const extern_type*& from_next,
+        intern_type* to, 
+        intern_type* to_end, 
+        intern_type*& to_next
+    ) const
+    {
+        state_t& st = states[&state];
+        
+        from_next = from;
+        to_next = to;
+        
+        if(st.pending_size)
+        {
+            *to_next++ = st.pending_data[0];
+            std::copy(st.pending_data + 1, st.pending_data + st.pending_size, st.pending_data);
+            st.pending_size--;
+            return std::codecvt_base::ok;
+        }
+        
+        try
+        {
+            std::pair<const extern_type*, intern_type*> p = p2.ltr(from_next, from_end, st.pending_data);
+            from_next = p.first;
+            *to_next++ = st.pending_data[0];
+            st.pending_size = p.second - st.pending_data;
+            std::copy(st.pending_data + 1, st.pending_data + st.pending_size, st.pending_data);
+            st.pending_size--;
+        }
+        catch(...)
+        {
+            return std::codecvt_base::partial;
+        }
+        return std::codecvt_base::ok;
+    }
+
+    virtual std::codecvt_base::result do_out(
+        state_type& state,
+        const intern_type* from,
+        const intern_type* from_end,
+        const intern_type*& from_next,
+        extern_type* to,
+        extern_type* to_end,
+        extern_type*& to_next
+    ) const
+    {
+        typedef const boost::iterator_range<const intern_type*> range_base;
+        typedef boost::range_detail::join_iterator<const intern_type*, const intern_type*> iterator;
+        
+        state_t& st = states[&state];
+        
+        from_next = from;
+        to_next = to;
+        
+        boost::joined_range<range_base, range_base> input = boost::join(
+            range_base(st.pending_data,  st.pending_data + st.pending_size),
+            range_base(from, from_end)
+        );
+        
+        iterator from2 = input.begin();
+        iterator from_next2 = from2;
+        iterator from_end2 = input.end();
+        
+        while(from_next2 != from_end2)
+        {
+            try
+            {
+                std::pair<iterator, extern_type*> p = p1.ltr(from_next2, from_end2, to_next);
+                from_next2 = p.first;
+                to_next = p.second;
+            }
+            catch(...)
+            {
+                size_t written = from_next2 - from2;
+                if(written >= st.pending_size)
+                {
+                    from_next += (from_next2 - from2) - st.pending_size;
+                    st.pending_size = 0;
+                }
+                
+                boost::copy(range_base(from_next, from_end), st.pending_data + st.pending_size);
+                st.pending_size += (from_end - from_next);
+                from_next = from_end;
+                return std::codecvt_base::ok;
+            }
+        }
+        
+        size_t written = from_next2 - from2;
+        if(written >= st.pending_size)
+        {
+            from_next += (from_next2 - from2) - st.pending_size;
+            st.pending_size = 0;
+        }
+        return std::codecvt_base::ok;
+    }
+
+    virtual bool do_always_noconv() const throw()
+    {
+        return false;
+    }
+
+    virtual std::codecvt_base::result do_unshift(
+        state_type& state,
+        extern_type* to,
+        extern_type* to_end,
+        extern_type*& to_next
+    ) const 
+    {
+        state_t& st = states[&state];
+        
+        to_next = to;
+        const intern_type* from = st.pending_data;
+        const intern_type* from_next = from;
+        const intern_type* from_end = st.pending_data + st.pending_size;
+        
+        while(from_next != from_end)
+        {
+            try
+            {
+                std::pair<const intern_type*, extern_type*> p = p1.ltr(from_next, from_end, to_next);
+                from_next = p.first;
+                to_next = p.second;
+            }
+            catch(...)
+            {
+                return std::codecvt_base::error;
+            }
+        }
+        
+        st.pending_size = 0;
+        return std::codecvt_base::ok;
+    }
+
+    virtual int do_encoding() const throw()
+    {
+        return 0;
+    }
+
+    virtual int do_length(
+        state_type&,
+        const extern_type* from,
+        const extern_type* from_end, 
+        std::size_t max_limit
+        ) const
+    {
+        const extern_type* from_next = from;
+        while(from_next != from_end && max_limit--)
+        {
+            try
+            {
+                std::pair<const extern_type*, dummy_output_iterator> p = p2.ltr(from_next, from_end, dummy_output_iterator());
+                from_next = p.first;
+            }
+            catch(...)
+            {
+                break;
+            }
+        }
+        return from_next - from;
+    }
+
+    virtual int do_max_length() const throw ()
+    {
+        return P1::max_output::value;
+    }
+};
+    
+} // namespace boost
+
+#endif
Modified: sandbox/SOC/2009/unicode/boost/unicode/compose_fwd.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/compose_fwd.hpp	(original)
+++ sandbox/SOC/2009/unicode/boost/unicode/compose_fwd.hpp	2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
@@ -145,7 +145,7 @@
     >::type
     decompose_impl(In begin, In end, Out out)
     {
-        char32* out_pos = out;
+        Out out_pos = out;
         
         bool to_sort = false;
         
@@ -224,12 +224,12 @@
         
         bool operator()(const ucd::unichar_compose_data_entry& lft, In rgt) const
         {   
-            return lft.decomp[0] > offset && lft.decomp[1+offset] < *rgt;
+            return lft.decomp[0] > offset && lft.decomp[1+offset] < boost::char32(*rgt);
         }
         
         bool operator()(In lft, const ucd::unichar_compose_data_entry& rgt) const
         {
-            return rgt.decomp[0] > offset && *lft < rgt.decomp[1+offset];
+            return rgt.decomp[0] > offset && boost::char32(*lft) < rgt.decomp[1+offset];
         }
         
     private:
Modified: sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp	(original)
+++ sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp	2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
@@ -139,7 +139,7 @@
                 
                 if(unicode::is_high_surrogate(value))
                 {
-            // precondition; next value must have be a low-surrogate:
+            // precondition; next value must be a low-surrogate:
                         if(++it == end)
                                 detail::invalid_utf_sequence(begin, end);
                         
@@ -466,6 +466,7 @@
 template<typename ValueType>
 struct utf_encoder : detail::select_encoder<ValueType>::type
 {
+    typedef ValueType output_type;
 };
 
 /** Model of \c \xmlonly<conceptname>Converter</conceptname>\endxmlonly,
@@ -587,6 +588,17 @@
 #endif
 };
 
+/** Model of \c \xmlonly<conceptname>Converter</conceptname>\endxmlonly
+ * that converts from UTF-X to UTF-Y, X being detected from the value type
+ * of the input range, Y being specified by the ValueType parameter */
+template<typename ValueType>
+struct utf_transcoder : boost::converted_converter<
+    boost::unicode::utf_decoder,
+    boost::unicode::utf_encoder<ValueType>
+>
+{
+};
+
 /** Model of \c \xmlonly<conceptname>OneManyConverter</conceptname>\endxmlonly
  * that converts from UTF-32 to ISO-8859-1 alias latin-1. */
 typedef boost::detail::unspecified< cast_converter<char> >::type latin1_encoder;
Modified: sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2	(original)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2	2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
@@ -22,6 +22,7 @@
 test-suite iterator :
     [ run iterator/test_convert.cpp ]
     [ run iterator/test_segment.cpp ]
+    [ run iterator/test_codecvt.cpp ]
 ;
 
 test-suite unicode :
Added: sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp	2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
@@ -0,0 +1,64 @@
+#define BOOST_TEST_MODULE Codecvt
+#include <boost/test/included/unit_test.hpp>
+
+#include <boost/iterator/converter_codecvt_facet.hpp>
+#include <boost/unicode/utf.hpp>
+#include <boost/unicode/compose.hpp>
+
+#include <fstream>
+#include <boost/range/algorithm.hpp>
+#include <boost/range/as_literal.hpp>
+
+typedef boost::converter_codecvt_facet<
+    wchar_t,
+    boost::unicode::utf_transcoder<char>,
+    boost::multi_converter<
+        boost::converted_converter<boost::unicode::utf_decoder, boost::unicode::normalizer>,
+        boost::unicode::utf_encoder<wchar_t>
+    >
+> utf_u8_normalize_codecvt;
+
+typedef boost::converter_codecvt_facet<
+    wchar_t,
+    boost::unicode::utf_transcoder<char>,
+    boost::unicode::utf_transcoder<wchar_t>
+> utf_u8_codecvt;
+
+
+BOOST_AUTO_TEST_CASE( codecvt )
+{
+    // e\u0301 is \u00E9
+    // \U0002FA1D is \U0002A600
+    const wchar_t data_[] = L"hello e\u0301 \U0002FA1D world";
+    boost::iterator_range<const wchar_t*> data = boost::as_literal(data_);
+    
+    const wchar_t data_normalized_[] = L"hello \u00E9 \U0002A600 world";
+    boost::iterator_range<const wchar_t*> data_normalized = data;//boost::as_literal(data_normalized_);
+
+    std::locale old_locale;
+    std::locale utf8_locale(old_locale, new utf_u8_codecvt());
+
+    // Set a New global locale
+    //std::locale::global(utf8_locale);
+
+    // Send the UTF-X data out, converting to UTF-8
+    {
+        std::wofstream ofs("data.ucd");
+        ofs.imbue(utf8_locale);
+        boost::copy(data, std::ostream_iterator<wchar_t, wchar_t>(ofs));
+    }
+
+    // Read the UTF-8 data back in, converting to UTF-X and normalizing on the way in
+    {
+        std::wifstream ifs("data.ucd");
+        ifs.imbue(utf8_locale);
+        wchar_t item = 0;
+        size_t i = 0;
+        while (ifs >> std::noskipws >> item)
+        {
+            BOOST_CHECK_EQUAL(data_normalized[i], item);
+            i++;
+        }
+        BOOST_CHECK_EQUAL(i, (size_t)boost::size(data_normalized));
+    }
+}
Modified: sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_utf.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_utf.cpp	(original)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_utf.cpp	2010-07-17 22:11:15 EDT (Sat, 17 Jul 2010)
@@ -24,7 +24,7 @@
     
     CHECK_UTF(8,
         input,
-        list_of<char>('$')(0xC2)(0xA2)(0xE2)(0x82)(0xAC)(0xF0)(0xA4)(0xAD)(0xA2)
+        list_of<char>('$')((char)0xC2)((char)0xA2)((char)0xE2)((char)0x82)((char)0xAC)((char)0xF0)((char)0xA4)((char)0xAD)((char)0xA2)
     );
     
     CHECK_UTF(16,