$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
From: technews_at_[hidden]
Date: 2008-05-31 18:53:58
Author: turkanis
Date: 2008-05-31 18:53:58 EDT (Sat, 31 May 2008)
New Revision: 46001
URL: http://svn.boost.org/trac/boost/changeset/46001
Log:
added support for archives with multiple members; added tests for metadata and for multiple members (fixes #1896)
Text files modified: 
   trunk/boost/iostreams/filter/gzip.hpp   |   516 +++++++++++++++++++++++++++++---------- 
   trunk/libs/iostreams/test/gzip_test.cpp |    65 ++++                                    
   2 files changed, 432 insertions(+), 149 deletions(-)
Modified: trunk/boost/iostreams/filter/gzip.hpp
==============================================================================
--- trunk/boost/iostreams/filter/gzip.hpp	(original)
+++ trunk/boost/iostreams/filter/gzip.hpp	2008-05-31 18:53:58 EDT (Sat, 31 May 2008)
@@ -19,6 +19,7 @@
 #include <boost/config.hpp> // STATIC_CONSTANT, STDC_NAMESPACE, 
                             // DINKUMWARE_STDLIB, __STL_CONFIG_H.
 #include <algorithm>                      // min.
+#include <cassert>
 #include <cstdio>                         // EOF.
 #include <cstddef>                        // size_t.
 #include <ctime>                          // std::time_t.
@@ -34,7 +35,8 @@
 #include <boost/iostreams/operations.hpp>
 #include <boost/iostreams/device/back_inserter.hpp>
 #include <boost/iostreams/filter/zlib.hpp>
-#include <boost/iostreams/pipeline.hpp>         
+#include <boost/iostreams/pipeline.hpp>     
+#include <boost/iostreams/putback.hpp>         
 
 // Must come last.
 #if defined(BOOST_MSVC)
@@ -47,6 +49,8 @@
 #endif
 
 namespace boost { namespace iostreams {
+                    
+//------------------Definitions of constants----------------------------------//
 
 namespace gzip {
 
@@ -59,6 +63,7 @@
 const int bad_length        = 3; // Recorded length doesn't match data.
 const int bad_header        = 4; // Malformed header.
 const int bad_footer        = 5; // Malformed footer.
+const int bad_method        = 6; // Unsupported compression method.
 
 namespace magic {
 
@@ -118,6 +123,8 @@
 
 } // End namespace gzip.
 
+//------------------Definition of gzip_params---------------------------------//
+
 //
 // Class name: gzip_params.
 // Description: Subclass of zlib_params with an additional field
@@ -142,6 +149,8 @@
     std::time_t  mtime;
 };
 
+//------------------Definition of gzip_error----------------------------------//
+
 //
 // Class name: gzip_error.
 // Description: Subclass of std::ios_base::failure thrown to indicate
@@ -163,6 +172,8 @@
     int zlib_error_code_;
 };
 
+//------------------Definition of gzip_compressor-----------------------------//
+
 //
 // Template name: gzip_compressor
 // Description: Model of OutputFilter implementing compression in the
@@ -283,7 +294,7 @@
         flags_ = 0;
     }
 
-    enum flag_type {
+    enum state_type {
         f_header_done = 1,
         f_body_done = f_header_done << 1,
         f_footer_done = f_body_done << 1
@@ -297,6 +308,81 @@
 
 typedef basic_gzip_compressor<> gzip_compressor;
 
+//------------------Definition of helper templates for decompression----------//
+
+namespace detail {
+
+// Processes gzip headers
+class gzip_header {
+public:
+    gzip_header() { reset(); }
+
+    // Members for processing header data
+    void process(char c);
+    bool done() const { return state_ == s_done; }
+    void reset();
+
+    // Members for accessing header data
+    std::string file_name() const { return file_name_; }
+    std::string comment() const { return comment_; }
+    bool text() const { return (flags_ & gzip::flags::text) != 0; }
+    int os() const { return os_; }
+    std::time_t mtime() const { return mtime_; }
+private:
+    enum state_type {
+        s_id1       = 1,
+        s_id2       = s_id1 + 1,
+        s_cm        = s_id2 + 1,
+        s_flg       = s_cm + 1,
+        s_mtime     = s_flg + 1,
+        s_xfl       = s_mtime + 1,
+        s_os        = s_xfl + 1,
+        s_xlen      = s_os + 1,
+        s_extra     = s_xlen + 1,
+        s_name      = s_extra + 1,
+        s_comment   = s_name + 1,
+        s_hcrc      = s_comment + 1,
+        s_done      = s_hcrc + 1
+    };
+    std::string  file_name_;
+    std::string  comment_;
+    int          os_;
+    std::time_t  mtime_;
+    int          flags_;
+    int          state_;
+    int          offset_;  // Offset within fixed-length region.
+    int          xlen_;    // Bytes remaining in extra field.
+};
+
+// Processes gzip footers
+class gzip_footer {
+public:
+    gzip_footer() { reset(); }
+    
+    // Members for processing footer data
+    void process(char c);
+    bool done() const { return state_ == s_done; }
+    void reset();
+    
+    // Members for accessing footer data
+    zlib::ulong crc() const { return crc_; }
+    zlib::ulong uncompressed_size() const { return isize_; }
+private:
+    enum state_type {
+        s_crc     = 1,
+        s_isize   = s_crc + 1,
+        s_done    = s_isize + 1
+    };
+    zlib::ulong  crc_;
+    zlib::ulong  isize_;
+    int          state_;
+    int          offset_; 
+};
+
+} // End namespace boost::iostreams::detail.
+
+//------------------Definition of basic_gzip_decompressor---------------------//
+
 //
 // Template name: basic_gzip_decompressor
 // Description: Model of InputFilter implementing compression in the
@@ -304,6 +390,9 @@
 //
 template<typename Alloc = std::allocator<char> >
 class basic_gzip_decompressor : basic_zlib_decompressor<Alloc> {
+private:
+    typedef basic_zlib_decompressor<Alloc>   base_type;
+    typedef typename base_type::string_type  string_type;
 public:
     typedef char char_type;
     struct category
@@ -316,35 +405,70 @@
     template<typename Source>
     std::streamsize read(Source& src, char_type* s, std::streamsize n)
     {
-        if ((flags_ & f_header_read) == 0) {
-            non_blocking_adapter<Source> nb(src);
-            read_header(nb);
-            flags_ |= f_header_read;
-        }
-
-        if ((flags_ & f_footer_read) != 0)
-            return -1;
-        
-        try {
-            std::streamsize result = 0;
-            std::streamsize amt;
-            if ((amt = base_type::read(src, s, n)) != -1) {
-                result += amt;
-                if (amt < n) { // Double check for EOF.
-                    amt = base_type::read(src, s + result, n - result);
-                    if (amt != -1)
+        typedef char_traits<char>  traits_type;
+        std::streamsize            result = 0;
+        peekable_source<Source>    peek(src, putback_);
+        while (result < n && state_ != s_done) {
+            if (state_ == s_start) {
+                state_ = s_header;
+                header_.reset();
+                footer_.reset();
+            }
+            if (state_ == s_header) {
+                int c = boost::iostreams::get(peek);
+                if (traits_type::is_eof(c)) {
+                    throw gzip_error(gzip::bad_header);
+                } else if (traits_type::would_block(c)) {
+                    break;
+                }
+                header_.process(c);
+                if (header_.done())
+                    state_ = s_body;
+            } else if (state_ == s_body) {
+                try {
+                    std::streamsize amt = 
+                        base_type::read(peek, s + result, n - result);
+                    if (amt != -1) {
                         result += amt;
+                        if (amt < n - result)
+                            break;
+                    } else {
+                        peek.putback(this->unconsumed_input());
+                        state_ = s_footer;
+                    }
+                } catch (const zlib_error& e) {
+                    throw gzip_error(e);
+                }
+            } else { // state_ == s_footer
+                int c = boost::iostreams::get(peek);
+                if (traits_type::is_eof(c)) {
+                    throw gzip_error(gzip::bad_footer);
+                } else if (traits_type::would_block(c)) {
+                    break;
+                }
+                footer_.process(c);
+                if (footer_.done()) {
+                    int c = boost::iostreams::get(peek);
+                    if (traits_type::is_eof(c)) {
+                        state_ = s_done;
+                    } else {
+                        peek.putback(c);
+                        base_type::close(peek, BOOST_IOS::in);
+                        state_ = s_start;
+                        header_.reset();
+                        footer_.reset();
+                    }
                 }
             }
-            if (amt == -1) {
-                non_blocking_adapter<Source> nb(src);
-                read_footer(nb);
-                flags_ |= f_footer_read;
-            }
-            return result;
-        } catch (const zlib_error& e) {
-            throw gzip_error(e);
         }
+        if (peek.has_unconsumed_input()) {
+            putback_ = peek.unconsumed_input();
+        } else {
+            putback_.clear();
+        }
+        return result != 0 || state_ != s_done ?
+            result :
+            -1;
     }
 
     template<typename Source>
@@ -353,136 +477,95 @@
         try {
             base_type::close(src, BOOST_IOS::in);
         } catch (const zlib_error& e) {
-            flags_ = 0;
+            state_ = s_start;
+            header_.reset();
+            footer_.reset();
             throw gzip_error(e);
         }
-        flags_ = 0;
+        state_ = s_start;
     }
 
-    std::string file_name() const { return file_name_; }
-    std::string comment() const { return comment_; }
-    bool text() const { return (flags_ & gzip::flags::text) != 0; }
-    int os() const { return os_; }
-    std::time_t mtime() const { return mtime_; }
+    std::string file_name() const { return header_.file_name(); }
+    std::string comment() const { return header_.comment(); }
+    bool text() const { return header_.text(); }
+    int os() const { return header_.os(); }
+    std::time_t mtime() const { return header_.mtime(); }
 private:
-    typedef basic_zlib_decompressor<Alloc>     base_type;
-    typedef BOOST_IOSTREAMS_CHAR_TRAITS(char)  traits_type;
-    static bool is_eof(int c) { return traits_type::eq_int_type(c, EOF); }
     static gzip_params make_params(int window_bits);
 
+    // Source adapter allowing an arbitrary character sequence to be put back.
     template<typename Source>
-    static uint8_t read_uint8(Source& src, int error)
-     {
-        int c;
-        if ((c = boost::iostreams::get(src)) == EOF || c == WOULD_BLOCK)
-            throw gzip_error(error);
-        return static_cast<uint8_t>(traits_type::to_char_type(c));
-    }
+    struct peekable_source {
+        typedef char char_type;
+        struct category : source_tag, peekable_tag { };
+        explicit peekable_source(Source& src, const string_type& putback = "") 
+            : src_(src), putback_(putback), offset_(0)
+            { }
+        std::streamsize read(char* s, std::streamsize n)
+        {
+            std::streamsize result = 0;
 
-    template<typename Source>
-    static uint32_t read_uint32(Source& src, int error)
-    {
-        uint8_t b1 = read_uint8(src, error);
-        uint8_t b2 = read_uint8(src, error);
-        uint8_t b3 = read_uint8(src, error);
-        uint8_t b4 = read_uint8(src, error);
-        return b1 + (b2 << 8) + (b3 << 16) + (b4 << 24);
-    }
+            // Copy characters from putback buffer
+            std::streamsize pbsize = 
+                static_cast<std::streamsize>(putback_.size());
+            if (offset_ < pbsize) {
+                result = (std::min)(n, pbsize - offset_);
+                BOOST_IOSTREAMS_CHAR_TRAITS(char)::copy(
+                    s, putback_.data() + offset_, result);
+                offset_ += result;
+                if (result == n)
+                    return result;
+            }
 
-    template<typename Source>
-    std::string read_string(Source& src)
-    {
-        std::string result;
-        while (true) {
-            int c;
-            if (is_eof(c = boost::iostreams::get(src)))
-                throw gzip_error(gzip::bad_header);
-            else if (c == 0)
-                return result;
-            else
-                result += static_cast<char>(c);
+            // Read characters from src_
+            std::streamsize amt = 
+                boost::iostreams::read(src_, s + result, n - result);
+            return amt != -1 ? 
+                result + amt : 
+                result ? result : -1;
+        }
+        bool putback(char c)
+        {
+            if (offset_) {
+                putback_[--offset_] = c;
+                return true;
+            } else {
+                return boost::iostreams::putback(src_, c);
+            }
+        }
+        void putback(const string_type& s)
+        {
+            putback_.replace(0, offset_, s);
+            offset_ = 0;
         }
-    }
-
-    template<typename Source>
-    void read_header(Source& src) // Source is non-blocking.
-    {
-        // Reset saved values.
-        #if BOOST_WORKAROUND(__GNUC__, == 2) && defined(__STL_CONFIG_H) || \
-            BOOST_WORKAROUND(BOOST_DINKUMWARE_STDLIB, == 1) \
-            /**/
-            file_name_.erase(0, std::string::npos);
-            comment_.erase(0, std::string::npos);
-        #else
-            file_name_.clear();
-            comment_.clear();
-        #endif
-        os_ = gzip::os_unknown;
-        mtime_ = 0;
-
-        int flags;
 
-        // Read header, without checking header crc.
-        if ( boost::iostreams::get(src) != gzip::magic::id1 ||   // ID1.
-             boost::iostreams::get(src) != gzip::magic::id2 ||   // ID2.
-             is_eof(boost::iostreams::get(src)) ||               // CM.
-             is_eof(flags = boost::iostreams::get(src)) )        // FLG.
+        // Returns true if some characters have been putback but not re-read.
+        bool has_unconsumed_input() const 
         {
-            throw gzip_error(gzip::bad_header);
+            return offset_ < static_cast<std::streamsize>(putback_.size());
         }
-        mtime_ = read_uint32(src, gzip::bad_header);        // MTIME.
-        read_uint8(src, gzip::bad_header);                 // XFL.
-        os_ = read_uint8(src, gzip::bad_header);          // OS.
-        if (flags & boost::iostreams::gzip::flags::text)
-            flags_ |= f_text;
-
-        // Skip extra field. (From J. Halleaux; see note at top.)
-        if (flags & gzip::flags::extra) {
-            int length = 
-                static_cast<int>(
-                    read_uint8(src, gzip::bad_header) +
-                    (read_uint8(src, gzip::bad_header) << 8)
-                );
-            // length is garbage if EOF but the loop below will quit anyway.
-            do { }
-            while (length-- != 0 && !is_eof(boost::iostreams::get(src)));
-        }
-
-        if (flags & gzip::flags::name)          // Read file name.
-            file_name_ = read_string(src);
-        if (flags & gzip::flags::comment)       // Read comment.
-            comment_ = read_string(src);
-        if (flags & gzip::flags::header_crc) {  // Skip header crc.
-            read_uint8(src, gzip::bad_header);
-            read_uint8(src, gzip::bad_header);
+
+        // Returns the sequence of characters that have been put back but not re-read.
+        string_type unconsumed_input() const
+        {
+            return string_type(putback_, offset_, putback_.size() - offset_);
         }
-    }
+        Source&          src_;
+        string_type      putback_;
+        std::streamsize  offset_;
+    };
 
-    template<typename Source>
-    void read_footer(Source& src)
-    {
-        typename base_type::string_type footer = 
-            this->unconsumed_input();
-        int c;
-        while (!is_eof(c = boost::iostreams::get(src)))
-            footer += c;
-        detail::range_adapter<input, std::string> 
-            rng(footer.begin(), footer.end());
-        if (read_uint32(rng, gzip::bad_footer) != this->crc())
-            throw gzip_error(gzip::bad_crc);
-        if (static_cast<int>(read_uint32(rng, gzip::bad_footer)) != this->total_out())
-            throw gzip_error(gzip::bad_length);
-    }
-    enum flag_type {
-        f_header_read  = 1,
-        f_footer_read  = f_header_read << 1,
-        f_text         = f_footer_read << 1
+    enum state_type {
+        s_start   = 1,
+        s_header  = s_start + 1,
+        s_body    = s_header + 1,
+        s_footer  = s_body + 1,
+        s_done    = s_footer + 1
     };
-    std::string  file_name_;
-    std::string  comment_;
-    int          os_;
-    std::time_t  mtime_;
-    int          flags_;
+    detail::gzip_header  header_;
+    detail::gzip_footer  footer_;
+    string_type          putback_;
+    int                  state_;
 };
 BOOST_IOSTREAMS_PIPABLE(basic_gzip_decompressor, 1)
 
@@ -574,13 +657,164 @@
     return amt;
 }
 
+//------------------Implementation of gzip_header-----------------------------//
+
+namespace detail {
+
+void gzip_header::process(char c)
+{
+    uint8_t value = static_cast<uint8_t>(c);
+    switch (state_) {
+    case s_id1:
+        if (value != gzip::magic::id1)
+            throw gzip_error(gzip::bad_header);
+        state_ = s_id2;
+        break;
+    case s_id2:
+        if (value != gzip::magic::id2)
+            throw gzip_error(gzip::bad_header);
+        state_ = s_cm;
+        break;
+    case s_cm:
+        if (value != gzip::method::deflate)
+            throw gzip_error(gzip::bad_method);
+        state_ = s_flg;
+        break;
+    case s_flg:
+        flags_ = value;
+        state_ = s_mtime;
+        break;
+    case s_mtime:
+        mtime_ += value << (offset_ * 8);
+        if (offset_ == 3) {
+            state_ = s_xfl;
+            offset_ = 0;
+        } else {
+            ++offset_;
+        }
+        break;
+    case s_xfl:
+        state_ = s_os;
+        break;
+    case s_os:
+        os_ = value;
+        if (flags_ & gzip::flags::extra) {
+            state_ = s_extra;
+        } else if (flags_ & gzip::flags::name) {
+            state_ = s_name;
+        } else if (flags_ & gzip::flags::comment) {
+            state_ = s_comment;
+        } else if (flags_ & gzip::flags::header_crc) {
+            state_ = s_hcrc;
+        } else {
+            state_ = s_done;
+        }
+        break;
+    case s_xlen:
+        xlen_ += value << (offset_ * 8);
+        if (offset_ == 1) {
+            state_ = s_extra;
+            offset_ = 0;
+        } else {
+            ++offset_;
+        }
+        break;
+    case s_extra:
+        if (--xlen_ == 0) {
+            if (flags_ & gzip::flags::name) {
+                state_ = s_name;
+            } else if (flags_ & gzip::flags::comment) {
+                state_ = s_comment;
+            } else if (flags_ & gzip::flags::header_crc) {
+                state_ = s_hcrc;
+            } else {
+                state_ = s_done;
+            }
+        }
+        break;
+    case s_name:
+        if (c != 0) {
+            file_name_ += c;
+        } else if (flags_ & gzip::flags::comment) {
+            state_ = s_comment;
+        } else if (flags_ & gzip::flags::header_crc) {
+            state_ = s_hcrc;
+        } else {
+            state_ = s_done;
+        }
+        break;
+    case s_comment:
+        if (c != 0) {
+            comment_ += c;
+        } else if (flags_ & gzip::flags::header_crc) {
+            state_ = s_hcrc;
+        } else {
+            state_ = s_done;
+        }
+        break;
+    case s_hcrc:
+        if (offset_ == 1) {
+            state_ = s_done;
+            offset_ = 0;
+        } else {
+            ++offset_;
+        }
+        break;
+    default:
+        assert(0);
+    }
+}
+
+void gzip_header::reset()
+{
+    file_name_.clear();
+    comment_.clear();
+    os_ = flags_ = offset_ = xlen_ = 0;
+    mtime_ = 0;
+    state_ = s_id1;
+}
+
+//------------------Implementation of gzip_footer-----------------------------//
+
+void gzip_footer::process(char c)
+{
+    uint8_t value = static_cast<uint8_t>(c);
+    if (state_ == s_crc) {
+        crc_ += value << (offset_ * 8);
+        if (offset_ == 3) {
+            state_ = s_isize;
+            offset_ = 0;
+        } else {
+            ++offset_;
+        }
+    } else if (state_ = s_isize) {
+        isize_ += value << (offset_ * 8);
+        if (offset_ == 3) {
+            state_ = s_done;
+            offset_ = 0;
+        } else {
+            ++offset_;
+        }
+    } else {
+        assert(0);
+    }
+}
+
+void gzip_footer::reset()
+{
+    state_ = s_crc;
+    offset_ = 0;
+}
+
+} // End namespace boost::iostreams::detail.
+
 //------------------Implementation of gzip_decompressor-----------------------//
 
 template<typename Alloc>
 basic_gzip_decompressor<Alloc>::basic_gzip_decompressor
     (int window_bits, int buffer_size)
     : base_type(make_params(window_bits), buffer_size),
-      os_(gzip::os_unknown), mtime_(0), flags_(0)
+      state_(s_start)
     { }
 
 template<typename Alloc>
Modified: trunk/libs/iostreams/test/gzip_test.cpp
==============================================================================
--- trunk/libs/iostreams/test/gzip_test.cpp	(original)
+++ trunk/libs/iostreams/test/gzip_test.cpp	2008-05-31 18:53:58 EDT (Sat, 31 May 2008)
@@ -6,8 +6,14 @@
 // See http://www.boost.org/libs/iostreams for documentation.
 
 #include <string>
+#include <boost/iostreams/copy.hpp>
+#include <boost/iostreams/device/array.hpp>
+#include <boost/iostreams/device/back_inserter.hpp>
 #include <boost/iostreams/filter/gzip.hpp>
 #include <boost/iostreams/filter/test.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/ref.hpp>
+#include <boost/range/iterator_range.hpp>
 #include <boost/test/test_tools.hpp>
 #include <boost/test/unit_test.hpp>
 #include "detail/sequence.hpp"
@@ -16,18 +22,36 @@
 using namespace boost;
 using namespace boost::iostreams;
 using namespace boost::iostreams::test;
+namespace io = boost::iostreams;
 using boost::unit_test::test_suite;     
 
 struct gzip_alloc : std::allocator<char> { };
 
-void gzip_test()
+void compression_test()
 {
-    text_sequence data;
-    BOOST_CHECK(
-        test_filter_pair( gzip_compressor(), 
-                          gzip_decompressor(), 
-                          std::string(data.begin(), data.end()) )
-    );
+    text_sequence      data;
+
+    // Test compression and decompression with metadata
+    for (int i = 0; i < 4; ++i) {
+        gzip_params params;
+        if (i & 1) {
+            params.file_name = "original file name";
+        }
+        if (i & 2) {
+            params.comment = "detailed file description";
+        }
+        gzip_compressor    out(params);
+        gzip_decompressor  in;
+        BOOST_CHECK(
+            test_filter_pair( boost::ref(out), 
+                              boost::ref(in), 
+                              std::string(data.begin(), data.end()) )
+        );
+        BOOST_CHECK(in.file_name() == params.file_name);
+        BOOST_CHECK(in.comment() == params.comment);
+    }
+
+    // Test compression and decompression with custom allocator
     BOOST_CHECK(
         test_filter_pair( basic_gzip_compressor<gzip_alloc>(), 
                           basic_gzip_decompressor<gzip_alloc>(), 
@@ -35,9 +59,34 @@
     );
 }
 
+void multiple_member_test()
+{
+    text_sequence      data;
+    std::vector<char>  temp, dest;
+
+    // Write compressed data to temp, twice in succession
+    filtering_ostream out;
+    out.push(gzip_compressor());
+    out.push(io::back_inserter(temp));
+    io::copy(make_iterator_range(data), out);
+    out.push(io::back_inserter(temp));
+    io::copy(make_iterator_range(data), out);
+
+    // Read compressed data from temp into dest
+    filtering_istream in;
+    in.push(gzip_decompressor());
+    in.push(array_source(&temp[0], temp.size()));
+    io::copy(in, io::back_inserter(dest));
+
+    // Check that dest consists of two copies of data
+    BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
+    BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));
+}
+
 test_suite* init_unit_test_suite(int, char* []) 
 {
     test_suite* test = BOOST_TEST_SUITE("gzip test");
-    test->add(BOOST_TEST_CASE(&gzip_test));
+    test->add(BOOST_TEST_CASE(&compression_test));
+    test->add(BOOST_TEST_CASE(&multiple_member_test));
     return test;
 }