$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r56586 - in sandbox/statistics/kernel/libs/statistics/kernel: example src
From: erwann.rogard_at_[hidden]
Date: 2009-10-04 18:55:00
Author: e_r
Date: 2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
New Revision: 56586
URL: http://svn.boost.org/trac/boost/changeset/56586
Log:
m
Text files modified: 
   sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp |    15 +--                                     
   sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp   |    53 +++++++------                           
   sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp        |   158 +++++++++++++++++++++++---------------- 
   sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp        |    32 ++++----                                
   sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp                 |     9 +-                                      
   5 files changed, 150 insertions(+), 117 deletions(-)
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp	(original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/benchmark_scalar.cpp	2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -5,6 +5,8 @@
 //  Software License, Version 1.0. (See accompanying file                    //
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)         //
 /////////////////////////////////////////////////////////////////////////////// 
+/*
+
 #include <cmath>
 #include <vector>
 #include <algorithm>
@@ -19,14 +21,9 @@
 #include <boost/math/tools/precision.hpp>
 
 // Order of the files matters!
-#include <boost/standard_distribution/distributions/normal.hpp>
-#include <boost/scalar_dist/fun_wrap/pdf.hpp>
-#include <boost/dist_random/distributions/normal.hpp>
-#include <boost/dist_random/random/generate_n.hpp>
-
-//#include <boost/scalar_dist/fun_wrap/pdf.hpp>
-//#include <boost/scalar_dist/meta/delegate.hpp>
-//#include <boost/scalar_dist/algorithm/transform.hpp>
+#include <boost/statistics/detail/distribution_toolkit/distributions/normal.hpp>
+#include <boost/statistics/detail/distribution_toolkit/fwd_math/cdf.hpp> // ?!
+
 
 #include <boost/binary_op/data/tuple_range.hpp>
 
@@ -268,7 +265,9 @@
     // DO the same for rp
     
     out << "<-" << std::endl;
+
 }
+*/
 
 
 
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp	(original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/kernel_mono_rp.cpp	2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -21,32 +21,40 @@
 #include <boost/statistics/kernel/functional/rp_visitor.hpp>
 #include <boost/statistics/kernel/functional/estimator.hpp>
 #include <libs/statistics/kernel/example/scalar_rp.h>
-
 void example_kernel_mono_rp(std::ostream& out){
     out << "-> example_kernel_mono_rp : ";
+
+
+    // This example shows how to compute a Rosenblatt-Parzen estimate of the 
+    // density, p(x). The type used for each data-unit, x, is a vector of 
+    // doubles, and the kernel uses the same bandwidth throughout all 
+    // coordinates
+
     using namespace boost;
+    namespace kernel = boost::statistics::kernel;
     
     // Types
     typedef double                                          val_;
     typedef std::vector<val_>                               vec_;
-    typedef std::vector<vec_>                               mat_;
+    typedef vec_                                            x_;
+    typedef std::vector<x_>                                 dataset_;
     typedef mt19937                                         urng_;
     typedef normal_distribution<val_>                       norm_;
     typedef variate_generator<urng_&,norm_>                 gen_;
-    typedef statistics::kernel::scalar::gaussian<val_>                  gauss_k_;
+    typedef kernel::scalar::gaussian<val_>                  gauss_k_;
 
     const unsigned dim = 2;
-    typedef statistics::kernel::joint::kernel_mono<gauss_k_,dim>               kernel_mono_k_;
-    // NB const vec_&, not vec_
-    typedef statistics::kernel::rp_visitor<kernel_mono_k_,const vec_&>  rp_visitor_;
+    typedef kernel::joint::kernel_mono<gauss_k_,dim> kernel_mono_k_;
+    // Use of a const reference is not necessary but probably improves speed
+    typedef kernel::rp_visitor<kernel_mono_k_,const x_&>  rp_visitor_;
     
     // Constants
     const val_ bandwidth = 0.5;
     const val_ eps = math::tools::epsilon<val_>();
     const unsigned n = 10;
     
-    // Generate sample
-    mat_ vec_x; vec_x.reserve(n);
+    // Generate n samples, each drawn from prod{N(0,1):i=1,...,dim}
+    dataset_ dataset; dataset.reserve(n);
     vec_ vec_rp; vec_rp.reserve(n);
     urng_ urng;
     norm_ norm;
@@ -54,41 +62,38 @@
     for(unsigned i = 0; i<n; i++){
         vec_ tmp(dim);
         std::generate_n(
-            begin(tmp),
+            boost::begin(tmp),
             dim,
             gen
         );
-        vec_x.push_back( tmp );
+        dataset.push_back( tmp );
     }
 
-    kernel_mono_k_ kernel_mono_k(bandwidth);
-    
-    kernel_mono_k(vec_x[0],vec_x[1]);
-    // Density estimate for each x in vec_x using vec_x as the sample
-    BOOST_FOREACH(const vec_& x,vec_x){
+    // Density estimate for each x in dataset
+    BOOST_FOREACH(const x_& x,dataset){
         val_ rp = std::for_each(
-            begin(vec_x),
-            end(vec_x),
+            boost::begin(dataset),
+            boost::end(dataset),
             rp_visitor_(bandwidth,x)
         ).estimate();
         vec_rp.push_back(rp);
     } 
-    typedef sub_range<mat_> sub_;
-    typedef statistics::kernel::estimator<
+    typedef sub_range<dataset_> sub_;
+    typedef kernel::estimator<
         sub_,
-        statistics::kernel::rp_visitor,
+        kernel::rp_visitor,
         kernel_mono_k_
     > estimator_;
     estimator_ estimator(bandwidth); 
-    statistics::train(estimator,sub_(vec_x));
+    estimator.train(sub_(dataset));
     vec_ vec_rp2; vec_rp2.reserve(n);
 
     // Same as previous but calls estimator instead of for_each
     for(unsigned i = 0; i<n; i++){
-        vec_ x = vec_x[i];
+        x_ x = dataset[i];
         val_ rp = vec_rp[i];
-        val_ rp2 = estimator(x).estimate();
+        val_ rp2 = estimator.predict(x);
         BOOST_ASSERT(fabs(rp-rp2)<eps);
     } 
     out << "<-" << std::endl;
-}
\ No newline at end of file
+}
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp	(original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_nw.cpp	2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -17,28 +17,59 @@
 #include <boost/math/tools/precision.hpp>
 #include <boost/typeof/typeof.hpp>
 
-#include <boost/binary_op/data/tuple_range.hpp>
-//#include <boost/statistics/estimator_concept/trainable_estimator/concept.hpp>
+#include <boost/fusion/sequence/intrinsic/at_key.hpp>
+#include <boost/fusion/include/at_key.hpp>
+#include <boost/fusion/container/map.hpp>
+#include <boost/fusion/include/map.hpp>
+#include <boost/fusion/include/map_fwd.hpp>
+
+
+#include <boost/statistics/detail/fusion/functor/at_key.hpp>
+
 #include <boost/statistics/kernel/scalar/gaussian.hpp>
-#include <boost/statistics/kernel/functional/nw_visitor_tuple.hpp>
+#include <boost/statistics/kernel/functional/meta_nw_visitor_unary.hpp>
 #include <boost/statistics/kernel/functional/estimator.hpp>
 #include <libs/statistics/kernel/example/scalar_nw.h>
 
 void example_scalar_nw(std::ostream& out){
+
     out << "-> example_scalar_nw : ";
     using namespace boost;
 
+    namespace kernel = boost::statistics::kernel;
+
+    // This example shows how to compute a Nadaraya-Watson estimate of E[y|x]. 
+    // The type used for each data-unit, here, is a fusion map whose x and y
+    // components are accessed using keys
+    
     // Types
     typedef double                                          val_;
-    typedef std::vector<val_>                               vec_;
+    typedef std::vector<val_>                               vals_;
+    typedef mpl::int_<0>                                    key_x_;
+    typedef mpl::int_<1>                                    key_y_;
+    typedef fusion::pair<key_x_,val_>                       x_;
+    typedef fusion::pair<key_y_,val_>                       y_;
+    typedef statistics::detail::fusion::functor::at_key<key_x_> at_key_x_;
+    typedef statistics::detail::fusion::functor::at_key<key_y_> at_key_y_;
+    typedef fusion::map<x_,y_>                              data_unit_;
+    typedef std::vector<data_unit_>                          dataset_;
+    // The rationale for data_range_ is it's cheap to copy
+    typedef sub_range<dataset_>                             data_range_;
+        
     typedef mt19937                                         urng_;
     typedef normal_distribution<val_>                       norm_;
     typedef variate_generator<urng_&,norm_>                 gen_;
-    typedef statistics::kernel::scalar::gaussian<val_>      gauss_k_;
-    typedef statistics::kernel::nw_visitor_tuple<gauss_k_,val_> 
-                                                            nw_visitor_tuple_;
-    typedef nw_visitor_tuple_::nw_visitor_type              nw_visitor_;
-    typedef nw_visitor_tuple_::rp_visitor_type              rp_visitor_;
+    typedef kernel::scalar::gaussian<val_>                  gauss_k_;
+    typedef kernel::meta_nw_visitor_unary<
+        at_key_x_,
+        at_key_y_
+    > meta_nw_visitor_u_;
+    typedef meta_nw_visitor_u_::apply<
+        gauss_k_,
+        val_
+    >::type  nw_visitor_u_;
+    typedef nw_visitor_u_::nw_visitor_type              nw_visitor_;
+    typedef nw_visitor_u_::rp_visitor_type              rp_visitor_;
     
     // Constants
     const val_ bandwidth = 0.5;
@@ -46,73 +77,70 @@
     const unsigned n = 10;
     
     // Initialization
-    vec_ vec_x(n);
-    vec_ vec_y(n,static_cast<val_>(1)); 
-    vec_ vec_rp; vec_rp.reserve(n);
-    vec_ vec_nw; vec_nw.reserve(n);
-    urng_ urng;
-    norm_ norm;
-    gen_ gen(urng,norm);
-    std::generate_n(
-        begin(vec_x),
-        n,
-        gen
-    );
-
-    // Computes a conditional mean estimate (nw) for each x in vec_x using 
-    // a sequence of (x,y) tuples constructed from (vec_x,vec_y) as training
-    // sample. The density (rp) is computed as a by-product. 
-    // Here, y = 1, so we should have rp = nw (un-normalized).
-    BOOST_FOREACH(val_& x,vec_x){
-        typedef binary_op::tuple_range<const vec_&,const vec_&> factory_;
-        typedef factory_::type range_tuple_;
-        range_tuple_ range_tuple = factory_::make(vec_x,vec_y);
+    vals_ vec_rp; vec_rp.reserve(n);
+    vals_ vec_nw; vec_nw.reserve(n);
+    dataset_ dataset;
+    dataset.reserve(n);
+    {
+        urng_ urng;
+        norm_ norm;
+        gen_ gen(urng,norm);
+        val_ one = static_cast<val_>(1);
+        for(unsigned i = 0; i<n; i++){
+            dataset.push_back(
+                data_unit_(
+                    fusion::make_pair<key_x_>(gen()),
+                    fusion::make_pair<key_y_>(one)
+                )
+            );
+        }
+    }
+
+    // Computes nw = E[y|x] for each x in the dataset. The density (rp) is 
+    // obtained as a by-product. Here, y = 1, so we should have 
+    // rp = nw (un-normalized).
+    BOOST_FOREACH(data_unit_& u,dataset){
         nw_visitor_ nw_visitor = std::for_each(
-            begin(range_tuple),
-            end(range_tuple),
-            nw_visitor_tuple_(bandwidth,x)
-        ).nw_visitor();
+            boost::begin(dataset),
+            boost::end(dataset),
+            nw_visitor_u_(
+                bandwidth,
+                fusion::at_key<key_x_>(u)
+            )
+        );
         val_ u_nw = nw_visitor.unnormalized_estimate();
         vec_nw.push_back(u_nw);
         rp_visitor_ rp_visitor = nw_visitor.rp_visitor();
         val_ rp = rp_visitor.estimate();
         BOOST_ASSERT(fabs(rp-u_nw)<eps);
-    } 
-
-    typedef binary_op::tuple_range<const vec_&,const vec_&> factory_;
-    typedef factory_::type range_xy_;
-    range_xy_ range_xy = factory_::make(vec_x,vec_y);
-    // A pair of iterators is cheap to copy so no need to pass it by reference
-    typedef statistics::kernel::estimator<
-        range_xy_,
-        statistics::kernel::nw_visitor_tuple,
+    }
+    
+    // Same as above using estimator
+    
+    typedef kernel::estimator<
+        data_range_,
+        meta_nw_visitor_u_::apply,
         gauss_k_
     > estimator_;
     estimator_ estimator(bandwidth);
-    statistics::train(estimator,range_xy);
-        
-    // Same as previous but calls estimator instead of for_each
-    BOOST_FOREACH(val_& x,vec_x){
-        // A local definition of nw_visitor_ is needed because x is passed
-        // by ref, not by value as in that outside the scope
-        typedef estimator_::result<val_>::type result_type;
-        typedef result_type::nw_visitor_type nw_visitor_;
-        typedef result_type::rp_visitor_type rp_visitor_;
-        nw_visitor_ nw_visitor = estimator(x).nw_visitor();
-        val_ u_nw = nw_visitor.unnormalized_estimate();
-        rp_visitor_ rp_visitor = nw_visitor.rp_visitor();
-        val_ rp = rp_visitor.estimate();
-        BOOST_ASSERT(fabs(rp-u_nw)<eps);
-    } 
+    estimator.train(
+        data_range_(dataset)
+    ); // * step 1 *
+
     
-    // Shorter version of the above
-    BOOST_FOREACH(val_& x,vec_x){
-        BOOST_AUTO( nw_visitor , estimator(x).nw_visitor() );
-        val_ u_nw = nw_visitor.unnormalized_estimate();
-        BOOST_AUTO( rp_visitor , nw_visitor.rp_visitor() );
-        val_ rp = rp_visitor.estimate();
+    BOOST_FOREACH(data_unit_& u,dataset){
+        // -> these steps are independent of step2, they're just a test
+        val_ x = fusion::at_key<key_x_>(u);
+        BOOST_AUTO( nw_v , estimator.visit(x) );
+        val_ u_nw = nw_v.unnormalized_estimate();
+        BOOST_AUTO( rp_v , nw_v.rp_visitor() );
+        val_ rp = rp_v.estimate();
         BOOST_ASSERT(fabs(rp-u_nw)<eps);
+        // <-
+        
+        estimator.predict(x); // * step 2 *
+    
     } 
     
     out << "<-" << std::endl;
-}
\ No newline at end of file
+}
Modified: sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp	(original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/example/scalar_rp.cpp	2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -15,6 +15,7 @@
 #include <boost/random/variate_generator.hpp>
 #include <boost/math/special_functions/fpclassify.hpp> //needed?
 #include <boost/math/tools/precision.hpp>
+#include <boost/mpl/int.hpp>
 
 #include <boost/statistics/kernel/scalar/gaussian.hpp>
 #include <boost/statistics/kernel/functional/rp_visitor.hpp>
@@ -25,6 +26,9 @@
     out << "-> example_scalar_rp : ";
     using namespace boost;
 
+    // This example shows how to compute a Rosenblatt-Parzen estimate of the 
+    // density, p(x). The type used for each data-unit, here, is double
+
     //Types
     typedef double                                          val_;
     typedef std::vector<val_>                               vec_;
@@ -40,33 +44,28 @@
     const unsigned n = 10;
 
     // Initialization
-    vec_ vec_x(n);
+    vec_ dataset(n);
     vec_ vec_rp; vec_rp.reserve(n);
     urng_ urng;
     norm_ norm;
     gen_ gen(urng,norm);
     std::generate_n(
-        begin(vec_x),
+        begin(dataset),
         n,
         gen
     );
 
-    // Computes a density estimate for each x in vec_x using vec_x as sample
-    BOOST_FOREACH(val_& x,vec_x){
+    // Computes a density estimate for each x in dataset
+    BOOST_FOREACH(val_& x,dataset){
         val_ rp = for_each(
-            begin(vec_x),
-            end(vec_x),
+            boost::begin(dataset),
+            boost::end(dataset),
             rp_visitor_(bandwidth,x)
         ).estimate();
         vec_rp.push_back(rp);
     } 
 
-    std::copy(
-        begin(vec_rp),
-        end(vec_rp),
-        std::ostream_iterator<val_>(out," ")
-    );
-    
+    // Same as previous but calls estimator instead of for_each
     typedef sub_range<vec_> sub_x_;
     typedef 
         statistics::kernel::estimator<
@@ -75,16 +74,17 @@
             gauss_k_
         > estimator_;
     estimator_ estimator(bandwidth); 
-    statistics::train(estimator,sub_x_(vec_x));
+    sub_x_ sub_x(dataset);
+    estimator.train(sub_x);
     vec_ vec_rp2; vec_rp2.reserve(n);
     
-    // Same as previous but calls estimator instead of for_each
     for(unsigned i = 0; i<n; i++){
-        val_ x = vec_x[i];
+        val_ x = dataset[i];
         val_ rp = vec_rp[i];
-        val_ rp2 = estimator(x).estimate();
+        val_ rp2 = estimator.predict(x);
         BOOST_ASSERT(fabs(rp-rp2)<eps);
     } 
             
     out << "<-" << std::endl;
+
 }
\ No newline at end of file
Modified: sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp
==============================================================================
--- sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp	(original)
+++ sandbox/statistics/kernel/libs/statistics/kernel/src/main.cpp	2009-10-04 18:54:59 EDT (Sun, 04 Oct 2009)
@@ -11,12 +11,13 @@
 #include <libs/statistics/kernel/example/kernel_mono_rp.h>
 #include <libs/statistics/kernel/example/benchmark_scalar.h>
 
+
 int main(){
 
-    // example_scalar_rp(std::cout);
-    // example_scalar_nw(std::cout);
-    // example_kernel_mono_rp(std::cout);
-    example_benchmark_scalar(std::cout);
+    example_scalar_rp(std::cout);
+    example_scalar_nw(std::cout);
+    example_kernel_mono_rp(std::cout);
+    //example_benchmark_scalar(std::cout);
     
     return 0;
 }
\ No newline at end of file