$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r85368 - trunk/tools/quickbook/src
From: dnljms_at_[hidden]
Date: 2013-08-16 13:31:26
Author: danieljames
Date: 2013-08-16 13:31:26 EDT (Fri, 16 Aug 2013)
New Revision: 85368
URL: http://svn.boost.org/trac/boost/changeset/85368
Log:
Simpler, less efficient, id generation.
I noticed a bug for an edge case in the id generator. Wouldn't be too
hard to fix, but the implementation was too complicated with some really
pointless optimizations, so I rewrote it using a simpler brute force
method. Will be fine for now unless something has a lot of duplicate ids.
Text files modified: 
   trunk/tools/quickbook/src/id_generation.cpp   |   338 ++++++++++++++++----------------------- 
   trunk/tools/quickbook/src/id_manager_impl.hpp |     3                                         
   2 files changed, 142 insertions(+), 199 deletions(-)
Modified: trunk/tools/quickbook/src/id_generation.cpp
==============================================================================
--- trunk/tools/quickbook/src/id_generation.cpp	Fri Aug 16 13:31:08 2013	(r85367)
+++ trunk/tools/quickbook/src/id_generation.cpp	2013-08-16 13:31:26 EDT (Fri, 16 Aug 2013)	(r85368)
@@ -25,90 +25,12 @@
 
     static const std::size_t max_size = 32;
 
-    //
-    // Data used for generating placeholders that have duplicates.
-    //
-
-    struct id_generation_data
-    {
-        id_generation_data(boost::string_ref src_id)
-          : child_start(src_id.rfind('.') + 1),
-            id(normalize_id(src_id, child_start, max_size - 1)),
-                // 'max_size - 1' leaves a character to append
-                // a number.
-            count(0)
-        {
-            if (std::isdigit(id[id.length() - 1]))
-            {
-                if (child_length() < max_size - 1)
-                    id += '_';
-                else
-                    reduce_id();
-            }
-        }
-
-        void reduce_id()
-        {
-            assert(id.length() > child_start);
-            std::size_t length = id.length() - 1;
-            while(length > child_start && std::isdigit(id[length - 1])) --length;
-            id.erase(length);
-            count = 0;
-        }
-
-        std::size_t child_length() const
-        {
-            return id.length() - child_start;
-        }
-
-        std::size_t child_start;
-        std::string id;
-        int count;
-    };
-
-    // Created for all desired ids, either when resolving an id or due to
-    // generating a new id to avoid duplicates.
-    struct id_data
-    {
-        id_data()
-          : category(id_category::numbered),
-            used(false),
-            generation_data()
-        {}
-
-        void update_category(id_category c)
-        {
-            if (c.c > category.c) category = c;
-        }
-
-        id_category category;   // The highest priority category of the
-                                // placeholders that want to use this id.
-        bool used;              // Whether this id has been used.
-        boost::shared_ptr<id_generation_data> generation_data;
-                                // If a duplicates are found, this is
-                                // created to generate new ids.
-                                //
-                                // Many to one relationship, because truncation
-                                // can lead to different ids contending for the
-                                // same id prefix.
-    };
-
-    struct placeholder_generation_data
-    {
-        placeholder_generation_data() : resolved_id(), data(0) {}
-
-        std::string resolved_id;
-        id_data* data;
-    };
-
-    typedef boost::unordered_map<std::string, id_data> allocated_ids;
-    typedef std::vector<placeholder_generation_data> placeholder_data;
     typedef std::vector<id_placeholder const*> placeholder_index;
+    placeholder_index index_placeholders(id_state const&, boost::string_ref);
 
-    placeholder_index index_placeholders(id_state const&, boost::string_ref xml);
-    void resolve_id(id_placeholder const&, std::vector<std::string> const&,
-        allocated_ids&, placeholder_data& data);
-    std::string generate_id(id_placeholder const&, allocated_ids&, placeholder_data& data);
+    void generate_id_block(
+            placeholder_index::iterator, placeholder_index::iterator,
+            std::vector<std::string>& generated_ids);
 
     std::vector<std::string> generate_ids(id_state const& state, boost::string_ref xml)
     {
@@ -119,9 +41,7 @@
         placeholder_index placeholders = index_placeholders(state, xml);
 
         typedef std::vector<id_placeholder const*>::iterator iterator;
-
-        iterator it = placeholders.begin(),
-            end = placeholders.end();
+        iterator it = placeholders.begin(), end = placeholders.end();
 
         while (it != end) {
             // We process all the ids that have the same number of dots
@@ -134,21 +54,8 @@
             while (group_end != end && (*group_end)->num_dots == (*it)->num_dots)
                 ++group_end;
 
-            // Used to find duplicate ids, and store required data about them.
-            allocated_ids ids;
-            placeholder_data data(state.placeholders.size());
-
-            // First resolve ids by adding them to their parent's ids
-            // (which have been fully processed in a previous iteration).
-            for (it = group_begin; it != group_end; ++it) {
-                resolve_id(**it, generated_ids, ids, data);
-            }
-
-            // Generate the final ids, taking into account any duplicates
-            // found when resolving.
-            for (it = group_begin; it != group_end; ++it) {
-                generated_ids[(*it)->index] = generate_id(**it, ids, data);
-            }
+            generate_id_block(group_begin, group_end, generated_ids);
+            it = group_end;
         }
 
         return generated_ids;
@@ -228,101 +135,146 @@
         return sorted_placeholders;
     }
 
-    //
-    // resolve_id
-    //
-    // Convert child ids to full ids, and add to the
-    // allocated ids (although not yet set in stone because
-    // there might be duplicates).
-    //
-    // Note that the parent ids has to be generated before resolving
-    // the child id.
-    //
+    // Resolve and generate ids.
 
-    void resolve_id(id_placeholder const& p, std::vector<std::string> const& generated_ids,
-            allocated_ids& ids, placeholder_data& data)
+    struct generate_id_block_type
     {
-        assert(!data[p.index].data);
+        // The ids which won't require duplicate handling.
+        typedef boost::unordered_map<std::string, id_placeholder const*>
+            chosen_id_map;
+        chosen_id_map chosen_ids;
+        std::vector<std::string>& generated_ids;
 
-        std::string id = p.parent ?
-            generated_ids[p.parent->index] + "." + p.id : p.id;
+        generate_id_block_type(std::vector<std::string>& generated_ids) :
+            generated_ids(generated_ids) {}
 
-        id_data& data_ = ids.emplace(id, id_data()).first->second;
-        data_.update_category(p.category);
+        void generate(placeholder_index::iterator begin,
+            placeholder_index::iterator end);
 
-        data[p.index].data = &data_;
-        data[p.index].resolved_id = id;
+        std::string resolve_id(id_placeholder const*);
+        std::string generate_id(id_placeholder const*, std::string const&);
+    };
+
+    void generate_id_block(placeholder_index::iterator begin,
+            placeholder_index::iterator end,
+            std::vector<std::string>& generated_ids)
+    {
+        generate_id_block_type impl(generated_ids);
+        impl.generate(begin, end);
     }
 
-    //
-    // generate_id
-    //
-    // Finally generate the final id.
-    //
+    void generate_id_block_type::generate(placeholder_index::iterator begin,
+            placeholder_index::iterator end)
+    {
+        std::vector<std::string> resolved_ids;
+
+        for (placeholder_index::iterator i = begin; i != end; ++i)
+            resolved_ids.push_back(resolve_id(*i));
 
-    void register_generation_data(id_placeholder const&, allocated_ids&,
-            placeholder_data& data);
+        unsigned index = 0;
+        for (placeholder_index::iterator i = begin; i != end; ++i, ++index)
+        {
+            generated_ids[(**i).index] =
+                generate_id(*i, resolved_ids[index]);
+        }
+    }
 
-    std::string generate_id(id_placeholder const& p, allocated_ids& ids,
-            placeholder_data& data)
+    std::string generate_id_block_type::resolve_id(id_placeholder const* p)
     {
-        assert(data[p.index].data);
+        std::string id = p->parent ?
+            generated_ids[p->parent->index] + "." + p->id :
+            p->id;
+
+        if (p->category.c > id_category::numbered) {
+            // Reserve the id if it isn't already reserved.
+            chosen_id_map::iterator pos = chosen_ids.emplace(id, p).first;
+
+            // If it was reserved by a placeholder with a lower category,
+            // then overwrite it.
+            if (p->category.c > pos->second->category.c)
+                pos->second = p;
+        }
+
+        return id;
+    }
 
-        // If the placeholder id is available, then update data
-        // and return.
-        if (p.category == data[p.index].data->category &&
-                !data[p.index].data->used &&
-                p.category.c != id_category::numbered)
+    std::string generate_id_block_type::generate_id(id_placeholder const* p,
+            std::string const& resolved_id)
+    {
+        if (p->category.c > id_category::numbered &&
+                chosen_ids.at(resolved_id) == p)
         {
-            data[p.index].data->used = true;
-            return data[p.index].resolved_id;
+            return resolved_id;
         }
 
-        if (!data[p.index].data->generation_data)
-        {
-            data[p.index].data->generation_data =
-                boost::make_shared<id_generation_data>(data[p.index].resolved_id);
-            register_generation_data(p, ids, data);
+        // Split the id into its parent part and child part.
+        //
+        // Note: can't just use the placeholder's parent, as the
+        // placeholder id might contain dots.
+        unsigned child_start = resolved_id.rfind('.');
+        std::string parent_id, base_id;
+
+        if (child_start == std::string::npos) {
+            base_id = normalize_id(resolved_id, max_size - 1);
+        }
+        else {
+            parent_id = resolved_id.substr(0, child_start + 1);
+            base_id = normalize_id(resolved_id.substr(child_start + 1),
+                    max_size - 1);
         }
 
-        // Loop until an available id is found.
-        for(;;)
-        {
-            id_generation_data& generation_data = *data[p.index].data->generation_data;
+        // Since we're adding digits, don't want an id that ends in
+        // a digit.
+
+        unsigned int length = base_id.size();
 
+        if (length > 0 && std::isdigit(base_id[length - 1])) {
+            if (length < max_size - 1) {
+                base_id += '_';
+                ++length;
+            }
+            else {
+                while (length > 0 && std::isdigit(base_id[length -1]))
+                    --length;
+                base_id.erase(length);
+            }
+        }
+
+        unsigned count = 0;
+
+        while (true)
+        {
             std::string postfix =
-                boost::lexical_cast<std::string>(generation_data.count++);
+                boost::lexical_cast<std::string>(count++);
 
-            if (generation_data.child_length() + postfix.length() > max_size) {
-                // The resulting id is too long, so move to a shorter id.
-                generation_data.reduce_id();
-                register_generation_data(p, ids, data);
+            if ((base_id.size() + postfix.size()) > max_size) {
+                // The id is now too long, so reduce the length and
+                // start again.
+
+                // Would need a lot of ids to get this far....
+                if (length == 0) throw std::runtime_error("Too many ids");
+
+                // Trim a character.
+                --length;
+
+                // Trim any trailing digits.
+                while (length > 0 && std::isdigit(base_id[length -1]))
+                    --length;
+
+                base_id.erase(length);
+                count = 0;
             }
             else {
-                std::string id = generation_data.id + postfix;
+                // Try to reserve this id.
+                std::string generated_id = parent_id + base_id + postfix;
 
-                if (ids.find(id) == ids.end()) return id;
+                if (chosen_ids.emplace(generated_id, p).second) {
+                    return generated_id;
+                }
             }
         }
     }
 
-    // Every time the generation id is changed, this is called to
-    // check if that id is already in use.
-    void register_generation_data(id_placeholder const& p, allocated_ids& ids,
-            placeholder_data& data)
-    {
-        std::string const& id = data[p.index].data->generation_data->id;
-
-        id_data& new_data = ids.emplace(id, id_data()).first->second;
-
-        // If there is already generation_data for the new id then use that.
-        // Otherwise use the placeholder's existing generation_data.
-        if (new_data.generation_data)
-            data[p.index].data->generation_data = new_data.generation_data;
-        else
-            new_data.generation_data = data[p.index].data->generation_data;
-    }
-
     //
     // replace_ids
     //
@@ -387,47 +339,39 @@
 
     std::string normalize_id(boost::string_ref src_id)
     {
-        return normalize_id(src_id, 0, max_size);
+        return normalize_id(src_id, max_size);
     }
 
-    std::string normalize_id(
-            boost::string_ref src_id,
-            std::size_t prefix = 0,
-            std::size_t size = max_size)
+    std::string normalize_id(boost::string_ref src_id, std::size_t size)
     {
         std::string id(src_id.begin(), src_id.end());
 
-        std::size_t src = prefix;
-        std::size_t dst = prefix;
-        size += prefix;
-
-        if (src >= id.length()) {
-            return id;
-        }
+        std::size_t src = 0;
+        std::size_t dst = 0;
 
         while (src < id.length() && id[src] == '_') {
             ++src;
         }
 
-        if (src >= id.length()) {
-            id += '_';
-            return id;
+        if (src == id.length()) {
+            id = "_";
         }
-
-        while (src < id.length() && dst < size) {
-            if (id[src] == '_') {
-                do {
-                    ++src;
-                } while(src < id.length() && id[src] == '_');
-
-                if (src < id.length()) id[dst++] = '_';
+        else {
+            while (src < id.length() && dst < size) {
+                if (id[src] == '_') {
+                    do {
+                        ++src;
+                    } while(src < id.length() && id[src] == '_');
+
+                    if (src < id.length()) id[dst++] = '_';
+                }
+                else {
+                    id[dst++] = id[src++];
+                }
             }
-            else {
-                id[dst++] = id[src++];
-            }
-        }
 
-        id.erase(dst);
+            id.erase(dst);
+        }
 
         return id;
     }
Modified: trunk/tools/quickbook/src/id_manager_impl.hpp
==============================================================================
--- trunk/tools/quickbook/src/id_manager_impl.hpp	Fri Aug 16 13:31:08 2013	(r85367)
+++ trunk/tools/quickbook/src/id_manager_impl.hpp	2013-08-16 13:31:26 EDT (Fri, 16 Aug 2013)	(r85368)
@@ -113,8 +113,7 @@
     std::vector<std::string> generate_ids(id_state const&, boost::string_ref);
 
     std::string normalize_id(boost::string_ref src_id);
-    std::string normalize_id(boost::string_ref src_id,
-            std::size_t, std::size_t);
+    std::string normalize_id(boost::string_ref src_id, std::size_t);
 
     //
     // Xml subset parser used for finding id values.