From: Steven Watanabe (steven_at_[hidden])
Date: 2007-08-09 16:59:06


AMDG

Eric Niebler <eric <at> boost-consulting.com> writes:

> How would you suggest ordered_append be implemented?

Mostly copied from dense_ordered_inserter:

template<typename Value>
struct dense_ordered_append_iterator
{
    typedef Value value_type;

    explicit dense_ordered_append_iterator(dense_array<Value> &that)
      : new_(&that)
    {}

    template<typename R, typename V>
    void set_at(R &run, V &value)
    {
        dense_array<Value> &that = *new_;
        typename Run<R>::offset_type off = rrs::offset(run);
        typename Run<R>::offset_type endoff = rrs::end_offset(run);
        if(off == -inf)
        {
            that.pre_.set(run, value);
        }
        else if(endoff == inf)
        {
            that.post_.set(run, value);
        }
        else
        {
            that.offset_ = std::min<std::ptrdiff_t>(that.offset_, off);
            that.reserve(endoff - that.offset_);
            that.resize(off - that.offset_, implicit_cast<Value const
&>(rrs::zero(that)));
            that.resize(endoff - that.offset_, value);
        }
    }

private:
    dense_array<Value>* new_;
};
 
> Imagine the series storage is std::map<offset, value>. Inserting one
> value with insert(value) is O(log N). Inserting N values this way is O(N
> log N). But if you use insert(begin, end), where begin and end are
> iterators to sorted data, the insert is faster. O(N).

It is in fact possible to fill a binary tree by pushing
sorted data in linear time.

struct node {
    node* right;
    node* left;
    node* parent;
    int value;
};

struct map {
    node* root;
};

struct node_inserter {
    void insert_to_right(int i) {
        node* result = new node;
        result->left = result->right = 0;
        result->parent = current;
        result->value = i;
        current->right = result;
        current = result;
    }
    void insert_after_complete_subtree(int i) {
        node* result = new node;
        result->parent = current->parent;
        current->parent->right = result;
        result->right = 0;
        result->left = current;
        result->value = i;
        current->parent = result;
        current = result;
    }
    void insert_at_root(int i) {
        node* result = new node;
        result->left = m->root;
        result->right = result->parent = 0;
        result->value = i;
        current = m->root = result;
    }
    void insert(int i) {
        unsigned long count = size;
        if(size == 0) {
            insert_at_root(i);
        } else if(count % 2 == 0) {
            insert_to_right(i);
        } else {
            count /= 2;
            while(count % 2 == 1) {
                count /= 2;
                current = current->parent;
            }
            if(count == 0) {
                insert_at_root(i);
            } else {
                insert_after_complete_subtree(i);
            }
        }
        ++size;
    }
    node* current;
    map* m;
    unsigned long size;
};

> The point is, you should be able to efficiently be able to
> write ordered data to a series without any concern for how that data is
> being stored, or even if it's being stored.

I don't think you should have *only* a range insert.
I think an output iterator that behaves more like
std::back_inserter is necessary too.

> But the lower-level stuff is still needed so that series types can be
> efficiently built in generic code (like the time series algorithms, for
> instance), which can't make any assumptions.

I agree that we need the lower level stuff. I just disagree
about *what* lower level stuff.
 
> >
> > std::vector<tuple<...> > elements;
> > for(;;)
> > elements.push_back(
> > make_tuple(poll_some_value(), last_time, current_time));
> > series.assign(elements);
>
> Every element is copied twice.
> >
> > vs.
> >
> > ordered_inserter inserter(series);
> > for(;;)
> > *insert++ = make_tuple(poll_some_value(), last_time, current_time);
> > inserter.commit();
>
> Every element is copied once.

Unless you're using a different intermediate representation.

> Have I missed your point, or did you just
> make mine for me?
>

Actually what I would like most is

ordered_inserter inserter(series);
for(;;)
    *insert++ = make_tuple(poll_some_value(), last_time, current_time);

With the additional guarantee that you can
process the partial results at any time.

In Christ,
Steven Watanabe