$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r57642 - in sandbox/odeint/boost/numeric/odeint: . detail
From: mario.mulansky_at_[hidden]
Date: 2009-11-13 16:29:33
Author: mariomulansky
Date: 2009-11-13 16:29:32 EST (Fri, 13 Nov 2009)
New Revision: 57642
URL: http://svn.boost.org/trac/boost/changeset/57642
Log:
now using iterator algebra in rk4 cash karp - 10% speed increase
Text files modified: 
   sandbox/odeint/boost/numeric/odeint/detail/iterator_algebra.hpp |   269 +++++++++++++++++++++++++++++---------- 
   sandbox/odeint/boost/numeric/odeint/stepper_rk4.hpp             |   111 +++++----------                         
   2 files changed, 237 insertions(+), 143 deletions(-)
Modified: sandbox/odeint/boost/numeric/odeint/detail/iterator_algebra.hpp
==============================================================================
--- sandbox/odeint/boost/numeric/odeint/detail/iterator_algebra.hpp	(original)
+++ sandbox/odeint/boost/numeric/odeint/detail/iterator_algebra.hpp	2009-11-13 16:29:32 EST (Fri, 13 Nov 2009)
@@ -25,106 +25,231 @@
 
     // computes y += alpha * x1
     template <
-	class InOutIterator ,
-	class InputIterator ,
-	class T
-	>
+        class InOutIterator ,
+        class InputIterator ,
+        class T
+        >
     void increment(
-	InOutIterator first1 ,
-	InOutIterator last1 ,
-	InputIterator first2 ,
-	T alpha
-	)
+                   InOutIterator first1 ,
+                   InOutIterator last1 ,
+                   InputIterator first2 ,
+                   T alpha
+                   )
     {
-	while( first1 != last1 )
-	    (*first1++) += alpha * (*first2++);
+        while( first1 != last1 )
+            (*first1++) += alpha * (*first2++);
     }
 
 
     // computes y = x1 - x2
     template <
-	class OutputIterator ,
-	class InputIterator1 ,
-	class InputIterator2
-	>
+        class OutputIterator ,
+        class InputIterator1 ,
+        class InputIterator2
+        >
     void assign_diff(
-	OutputIterator first1 ,
-	OutputIterator last1 ,
-	InputIterator1 first2 ,
-	InputIterator2 first3 )
+                     OutputIterator first1 ,
+                     OutputIterator last1 ,
+                     InputIterator1 first2 ,
+                     InputIterator2 first3 )
     {
-	while( first1 != last1 )
-	    (*first1++) = (*first2++) - (*first3++);
+        while( first1 != last1 )
+            (*first1++) = (*first2++) - (*first3++);
     }
 
     // computes y = x1 + alpha * x2
     template <
-	class OutputIterator ,
-	class InputIterator1 ,
-	class InputIterator2 ,
-	class T
-	>
+        class OutputIterator ,
+        class InputIterator1 ,
+        class InputIterator2 ,
+        class T
+        >
     void assign_sum(
-	OutputIterator first1 ,
-	OutputIterator last1 ,
-	InputIterator1 first2 ,
-	InputIterator2 first3 ,
-	T alpha )
+                    OutputIterator first1 ,
+                    OutputIterator last1 ,
+                    InputIterator1 first2 ,
+                    InputIterator2 first3 ,
+                    T alpha )
     {
-	while( first1 != last1 )
-	    (*first1++) = (*first2++) + alpha * (*first3++);
+        while( first1 != last1 )
+            (*first1++) = (*first2++) + alpha * (*first3++);
     }
 
 
     // computes y = alpha1 * ( x1 + x2 + alpha2*x3 )
     template <
-	class OutputIterator ,
-	class InputIterator1 ,
-	class InputIterator2 ,
-	class InputIterator3 ,
-	class T
-	>
+        class OutputIterator ,
+        class InputIterator1 ,
+        class InputIterator2 ,
+        class InputIterator3 ,
+        class T
+        >
     void increment_sum_sum(
-	OutputIterator first1 ,
-	OutputIterator last1 ,
-	InputIterator1 first2 ,
-	InputIterator2 first3 ,
-	InputIterator3 first4 ,
-	T alpha1 ,
-	T alpha2
-	)
-    {
-	while( first1 != last1 )
-	    (*first1++) += alpha1 *
-		( (*first2++) + (*first3++) + alpha2*(*first4++) );
+                           OutputIterator first1 ,
+                           OutputIterator last1 ,
+                           InputIterator1 first2 ,
+                           InputIterator2 first3 ,
+                           InputIterator3 first4 ,
+                           T alpha1 ,
+                           T alpha2
+                           )
+    {
+        while( first1 != last1 )
+            (*first1++) += alpha1 *
+                ( (*first2++) + (*first3++) + alpha2*(*first4++) );
+    }
+
+    // computes y = x1 + alpha2*x2
+    template <
+        class OutputIterator ,
+        class InputIterator ,
+        class T
+        >
+    inline void scale_sum( OutputIterator y_begin ,
+                           OutputIterator y_end ,
+                           T alpha1 ,
+                           InputIterator x1_begin ,
+                           T alpha2 ,
+                           InputIterator x2_begin )
+    {
+        while( y_begin != y_end )
+            (*y_begin++) = alpha1 * (*x1_begin++) + alpha2 * (*x2_begin++);
+    }
+
+
+    // computes y = x1 + alpha2*x2 + alpha3*x3
+    template <
+        class OutputIterator ,
+        class InputIterator ,
+        class T
+        >
+    inline void scale_sum( OutputIterator y_begin ,
+                           OutputIterator y_end ,
+                           T alpha1 ,
+                           InputIterator x1_begin ,
+                           T alpha2 ,
+                           InputIterator x2_begin ,
+                           T alpha3 ,
+                           InputIterator x3_begin )
+    {
+        while( y_begin != y_end )
+            (*y_begin++) = 
+                alpha1 * (*x1_begin++) + 
+                alpha2 * (*x2_begin++) + 
+                alpha3 * (*x3_begin++);
+    }
+
+    // computes y = x1 + alpha2*x2 + alpha3*x3 + alpha4*x4
+    template <
+        class OutputIterator ,
+        class InputIterator ,
+        class T
+        >
+    inline void scale_sum( OutputIterator y_begin ,
+                           OutputIterator y_end ,
+                           T alpha1 ,
+                           InputIterator x1_begin ,
+                           T alpha2 ,
+                           InputIterator x2_begin ,
+                           T alpha3 ,
+                           InputIterator x3_begin ,
+                           T alpha4 ,
+                           InputIterator x4_begin )
+    {
+        while( y_begin != y_end )
+            (*y_begin++) = 
+                alpha1 * (*x1_begin++) + 
+                alpha2 * (*x2_begin++) + 
+                alpha3 * (*x3_begin++) +
+                alpha4 * (*x4_begin++);
     }
 
+    // computes y = x1 + alpha2*x2 + alpha3*x3 + alpha4*x4 + alpha5*x5
+    template <
+        class OutputIterator ,
+        class InputIterator ,
+        class T
+        >
+    inline void scale_sum( OutputIterator y_begin ,
+                           OutputIterator y_end ,
+                           T alpha1 ,
+                           InputIterator x1_begin ,
+                           T alpha2 ,
+                           InputIterator x2_begin ,
+                           T alpha3 ,
+                           InputIterator x3_begin ,
+                           T alpha4 ,
+                           InputIterator x4_begin ,
+                           T alpha5 ,
+                           InputIterator x5_begin )
+    {
+        while( y_begin != y_end )
+            (*y_begin++) = 
+                alpha1 * (*x1_begin++) + 
+                alpha2 * (*x2_begin++) + 
+                alpha3 * (*x3_begin++) +
+                alpha4 * (*x4_begin++) +
+                alpha5 * (*x5_begin++);
+    }
 
-    // computes y = x1 + alpha * x2 ; x2 += x3
+
+    // computes y = x1 + alpha2*x2 + alpha3*x3 + alpha4*x4 + alpha5*x5 + alpha6*x6
+    template <
+        class OutputIterator ,
+        class InputIterator ,
+        class T
+        >
+    inline void scale_sum( OutputIterator y_begin ,
+                           OutputIterator y_end ,
+                           T alpha1 ,
+                           InputIterator x1_begin ,
+                           T alpha2 ,
+                           InputIterator x2_begin ,
+                           T alpha3 ,
+                           InputIterator x3_begin ,
+                           T alpha4 ,
+                           InputIterator x4_begin ,
+                           T alpha5 ,
+                           InputIterator x5_begin ,
+                           T alpha6 ,
+                           InputIterator x6_begin )
+    {
+        while( y_begin != y_end )
+            (*y_begin++) = 
+                alpha1 * (*x1_begin++) + 
+                alpha2 * (*x2_begin++) + 
+                alpha3 * (*x3_begin++) +
+                alpha4 * (*x4_begin++) +
+                alpha5 * (*x5_begin++) +
+                alpha6 * (*x6_begin++);
+    }
+
+
+    // computes y = x1 + alpha2 * x2 ; x2 += x3
     template<
-	class OutputIterator ,
-	class InputIterator1 ,
-	class InOutIterator ,
-	class InputIterator2 ,
-	class T
-	>
+        class OutputIterator ,
+        class InputIterator1 ,
+        class InOutIterator ,
+        class InputIterator2 ,
+        class T
+        >
     void assign_sum_increment(
-	OutputIterator first1 ,
-	OutputIterator last1 ,
-	InputIterator1 first2 ,
-	InOutIterator first3 ,
-	InputIterator2 first4 ,
-	T alpha
-	)
-    {
-	while( first1 != last1 )
-	{
-	    (*first1++) = (*first2++) + alpha * (*first3);
-	    (*first3++) += (*first4++);
-	}
+                              OutputIterator first1 ,
+                              OutputIterator last1 ,
+                              InputIterator1 first2 ,
+                              InOutIterator first3 ,
+                              InputIterator2 first4 ,
+                              T alpha
+                              )
+    {
+        while( first1 != last1 )
+          {
+                (*first1++) = (*first2++) + alpha * (*first3);
+                (*first3++) += (*first4++);
+          }
     }
 
-
+    
     
 
 
Modified: sandbox/odeint/boost/numeric/odeint/stepper_rk4.hpp
==============================================================================
--- sandbox/odeint/boost/numeric/odeint/stepper_rk4.hpp	(original)
+++ sandbox/odeint/boost/numeric/odeint/stepper_rk4.hpp	2009-11-13 16:29:32 EST (Fri, 13 Nov 2009)
@@ -149,7 +149,7 @@
         template< class DynamicalSystem >
         void next_step( DynamicalSystem &system ,
                         container_type &x ,
-                        const container_type &dxdt ,
+                        container_type &dxdt ,
                         time_type t ,
                         time_type dt ,
                         container_type &xerr )
@@ -167,85 +167,54 @@
             assign_sum( m_xt.begin() , m_xt.end() , x.begin() , dxdt.begin() , 
                         dt*time_type(rk4_b21) );
 
-            system( m_xt , m_dxt , t + dt*time_type(rk4_a2) ); // m_dxt = nr_ak2
-            iterator x_i = x.begin();
-            iterator m_xt_i = m_xt.begin();
-            typename container_type::const_iterator dxdt_i = dxdt.begin();
-            iterator m_dxt_i = m_dxt.begin();
-            while( x_i != x.end() ) {
-                *m_xt_i++ = (*x_i++) + dt*( time_type(rk4_b31)*(*dxdt_i++) + 
-                                            time_type(rk4_b32)*(*m_dxt_i++) );
-            }
+            time_type t_1 = time_type(1.0);
 
+            system( m_xt , m_dxt , t + dt*time_type(rk4_a2) ); // m_dxt = nr_ak2
+            // m_xt = x + dt*rk4_b31*dxt + dt*rk4_b32*m_dxt
+            scale_sum( m_xt.begin(), m_xt.end(), 
+                       t_1, x.begin(), 
+                       dt*time_type(rk4_b31), dxdt.begin(),
+                       dt*time_type(rk4_b32), m_dxt.begin() );
+            
             system( m_xt , m_dxm , t + dt*time_type(rk4_a3) ); // m_dxm = nr_ak3
-            x_i = x.begin();
-            m_xt_i = m_xt.begin();
-            dxdt_i = dxdt.begin();
-            m_dxt_i = m_dxt.begin();
-            iterator m_dxm_i = m_dxm.begin();
-            while( x_i != x.end() ) {
-                *m_xt_i++ = (*x_i++) + dt*( time_type(rk4_b41)*(*dxdt_i++) + 
-                                            time_type(rk4_b42)*(*m_dxt_i++) + 
-                                            time_type(rk4_b43)*(*m_dxm_i++) );
-            }
+            scale_sum( m_xt.begin(), m_xt.end(), 
+                       t_1, x.begin(),
+                       dt*time_type(rk4_b41), dxdt.begin(),
+                       dt*time_type(rk4_b42), m_dxt.begin(),
+                       dt*time_type(rk4_b43), m_dxm.begin() );
 
             system( m_xt, m_x4 , t + dt*time_type(rk4_a4) ); // m_x4 = nr_ak4
-            x_i = x.begin();
-            m_xt_i = m_xt.begin();
-            dxdt_i = dxdt.begin();
-            m_dxt_i = m_dxt.begin();
-            m_dxm_i = m_dxm.begin();
-            iterator m_x4_i = m_x4.begin();
-            while( x_i != x.end() ) {
-                *m_xt_i++ = (*x_i++) + dt*( time_type(rk4_b51)*(*dxdt_i++) + 
-                                            time_type(rk4_b52)*(*m_dxt_i++) +
-                                            time_type(rk4_b53)*(*m_dxm_i++) + 
-                                            time_type(rk4_b54)*(*m_x4_i++) ) ;
-            }
+            scale_sum( m_xt.begin(), m_xt.end(), 
+                       t_1, x.begin(),
+                       dt*time_type(rk4_b51), dxdt.begin(),
+                       dt*time_type(rk4_b52), m_dxt.begin(),
+                       dt*time_type(rk4_b53), m_dxm.begin(),
+                       dt*time_type(rk4_b54), m_x4.begin() );
 
             system( m_xt , m_x5 , t + dt*time_type(rk4_a5) ); // m_x5 = nr_ak5
-            x_i = x.begin();
-            m_xt_i = m_xt.begin();
-            dxdt_i = dxdt.begin();
-            m_dxt_i = m_dxt.begin();
-            m_dxm_i = m_dxm.begin();
-            m_x4_i = m_x4.begin();
-            iterator m_x5_i = m_x5.begin();
-            while( x_i != x.end() ) {
-                *m_xt_i++ = (*x_i++) + dt*( time_type(rk4_b61)*(*dxdt_i++) +
-                                          time_type(rk4_b62)*(*m_dxt_i++) +
-                                          time_type(rk4_b63)*(*m_dxm_i++) +
-                                          time_type(rk4_b64)*(*m_x4_i++) +
-                                          time_type(rk4_b65)*(*m_x5_i++) );
-            }
+            scale_sum( m_xt.begin(), m_xt.end(), 
+                       t_1, x.begin(),
+                       dt*time_type(rk4_b61), dxdt.begin(),
+                       dt*time_type(rk4_b62), m_dxt.begin(),
+                       dt*time_type(rk4_b63), m_dxm.begin(),
+                       dt*time_type(rk4_b64), m_x4.begin(),
+                       dt*time_type(rk4_b65), m_x5.begin() );
 
             system( m_xt , m_x6 , t + dt*time_type(rk4_a6) ); // m_x6 = nr_ak6
-            x_i = x.begin();
-            dxdt_i = dxdt.begin();
-            m_dxm_i = m_dxm.begin();
-            m_x4_i = m_x4.begin();
-            iterator m_x6_i = m_x6.begin();
-            while( x_i != x.end() ) {
-                (*x_i++) += dt*( time_type(rk4_c1)*(*dxdt_i++) +
-                                 time_type(rk4_c3)*(*m_dxm_i++) +
-                                 time_type(rk4_c4)*(*m_x4_i++) +
-                                 time_type(rk4_c6)*(*m_x6_i++) );
-            }
-
+            scale_sum( x.begin(), x.end(), 
+                       t_1, x.begin(),
+                       dt*time_type(rk4_c1), dxdt.begin(),
+                       dt*time_type(rk4_c3), m_dxm.begin(),
+                       dt*time_type(rk4_c4), m_x4.begin(),
+                       dt*time_type(rk4_c6), m_x6.begin() );
+            
             // error estimate
-            iterator xerr_i = xerr.begin();
-            dxdt_i = dxdt.begin();
-            m_dxm_i = m_dxm.begin();
-            m_x4_i = m_x4.begin();
-            m_x5_i = m_x5.begin();
-            m_x6_i = m_x6.begin();
-            while( xerr_i != xerr.end() ) {
-                *xerr_i++ = dt*( time_type(rk4_dc1)*(*dxdt_i++) +
-                                  time_type(rk4_dc3)*(*m_dxm_i++) +
-                                  time_type(rk4_dc4)*(*m_x4_i++) +
-                                  time_type(rk4_dc5)*(*m_x5_i++) +
-                                  time_type(rk4_dc6)*(*m_x6_i++) );
-            }
+            scale_sum(xerr.begin(), xerr.end(),
+                      dt*time_type(rk4_dc1), dxdt.begin(),
+                      dt*time_type(rk4_dc3), m_dxm.begin(),
+                      dt*time_type(rk4_dc4), m_x4.begin(),
+                      dt*time_type(rk4_dc5), m_x5.begin(),
+                      dt*time_type(rk4_dc6), m_x6.begin() );
         }
 
         template< class DynamicalSystem >