$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r77987 - trunk/libs/context/performance
From: oliver.kowalke_at_[hidden]
Date: 2012-04-15 06:49:42
Author: olli
Date: 2012-04-15 06:49:42 EDT (Sun, 15 Apr 2012)
New Revision: 77987
URL: http://svn.boost.org/trac/boost/changeset/77987
Log:
context: reorganice performance measurement
Text files modified: 
   trunk/libs/context/performance/performance.cpp            |   133 +++++++++++++++------------------------ 
   trunk/libs/context/performance/performance_gcc_i386.hpp   |     7 +                                       
   trunk/libs/context/performance/performance_gcc_x86-64.hpp |    11 ++                                      
   3 files changed, 67 insertions(+), 84 deletions(-)
Modified: trunk/libs/context/performance/performance.cpp
==============================================================================
--- trunk/libs/context/performance/performance.cpp	(original)
+++ trunk/libs/context/performance/performance.cpp	2012-04-15 06:49:42 EDT (Sun, 15 Apr 2012)
@@ -4,6 +4,8 @@
 //    (See accompanying file LICENSE_1_0.txt or copy at
 //          http://www.boost.org/LICENSE_1_0.txt)
 
+#define BOOST_PP_LIMIT_MAG  10
+
 #include <cstdio>
 #include <cstdlib>
 #include <iostream>
@@ -13,7 +15,7 @@
 #include <boost/bind.hpp>
 #include <boost/config.hpp>
 #include <boost/context/all.hpp>
-#include <boost/program_options.hpp>
+#include <boost/preprocessor/repetition/repeat_from_to.hpp>
 
 #ifndef BOOST_WINDOWS
 #include <ucontext.h>
@@ -23,7 +25,12 @@
 #include "performance.hpp"
 
 namespace ctx = boost::ctx;
-namespace po = boost::program_options;
+
+#define CALL_UCONTEXT(z,n,unused) \
+    ::swapcontext( & ucm, & uc);
+
+#define CALL_FCONTEXT(z,n,unused) \
+    ctx::jump_fcontext( & fcm, & fc, 0);
 
 #ifndef BOOST_WINDOWS
 ucontext_t uc, ucm;
@@ -45,111 +52,74 @@
 }
 
 #ifndef BOOST_WINDOWS
-unsigned int test_ucontext( unsigned int iterations)
+unsigned int test_ucontext()
 {
-    cycle_t total( 0);
     cycle_t overhead( get_overhead() );
     std::cout << "overhead for rdtsc == " << overhead << " cycles" << std::endl;
 
-    // cache warum-up
-    {
-        ctx::stack_allocator alloc;
+    ctx::stack_allocator alloc;
 
-        ::getcontext( & uc);
-        uc.uc_stack.ss_sp = 
-            static_cast< char * >( alloc.allocate(ctx::default_stacksize() ) )
-            - ctx::default_stacksize();
-        uc.uc_stack.ss_size = ctx::default_stacksize();
-        ::makecontext( & uc, f2, 0);
-        swapcontext( & ucm, & uc);
-        swapcontext( & ucm, & uc);
-    }
+    ::getcontext( & uc);
+    uc.uc_stack.ss_sp = 
+        static_cast< char * >( alloc.allocate(ctx::default_stacksize() ) )
+        - ctx::default_stacksize();
+    uc.uc_stack.ss_size = ctx::default_stacksize();
+    ::makecontext( & uc, f2, 0);
 
-    for ( unsigned int i = 0; i < iterations; ++i)
-    {
-        cycle_t start( get_cycles() );
-        swapcontext( & ucm, & uc);
-        cycle_t diff( get_cycles() - start);
-
-        // we have two jumps and two measuremt-overheads
-        diff -= overhead; // overhead of measurement
-        diff /= 2; // 2x jump_to c1->c2 && c2->c1
+    // cache warum-up
+BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~)
 
-        BOOST_ASSERT( diff >= 0);
-        total += diff;
-    }
-    return total/iterations;
+    cycle_t start( get_cycles() );
+BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~)
+    cycle_t total( get_cycles() - start);
+
+    // we have two jumps and two measuremt-overheads
+    total -= overhead; // overhead of measurement
+    total /= BOOST_PP_LIMIT_MAG; // per call
+    total /= 2; // 2x jump_to c1->c2 && c2->c1
+
+    return total;
 }
 #endif
 
-unsigned int test_fcontext( unsigned int iterations)
+unsigned int test_fcontext()
 {
-    cycle_t total( 0);
     cycle_t overhead( get_overhead() );
     std::cout << "overhead for rdtsc == " << overhead << " cycles" << std::endl;
 
-    // cache warum-up
-    {
-        ctx::stack_allocator alloc;
+    ctx::stack_allocator alloc;
+    fc.fc_stack.base = alloc.allocate(ctx::default_stacksize());
+    fc.fc_stack.limit =
+        static_cast< char * >( fc.fc_stack.base) - ctx::default_stacksize();
+	ctx::make_fcontext( & fc, f1, 0);
 
-        fc.fc_stack.base = alloc.allocate(ctx::default_stacksize());
-        fc.fc_stack.limit =
-            static_cast< char * >( fc.fc_stack.base) - ctx::default_stacksize();
-		ctx::make_fcontext( & fc, f1, 0);
-        ctx::start_fcontext( & fcm, & fc);
-        ctx::jump_fcontext( & fcm, & fc, 0);
-    }
+    ctx::start_fcontext( & fcm, & fc);
 
-    for ( unsigned int i = 0; i < iterations; ++i)
-    {
-        cycle_t start( get_cycles() );
-        ctx::jump_fcontext( & fcm, & fc, 0);
-        cycle_t diff( get_cycles() - start);
-
-        // we have two jumps and two measuremt-overheads
-        diff -= overhead; // overhead of measurement
-        diff /= 2; // 2x jump_to c1->c2 && c2->c1
+    // cache warum-up
+BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~)
 
-        BOOST_ASSERT( diff >= 0);
-        total += diff;
-    }
-    return total/iterations;
+    cycle_t start( get_cycles() );
+BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~)
+    cycle_t total( get_cycles() - start);
+
+    // we have two jumps and two measuremt-overheads
+    total -= overhead; // overhead of measurement
+    total /= BOOST_PP_LIMIT_MAG; // per call
+    total /= 2; // 2x jump_to c1->c2 && c2->c1
+
+    return total;
 }
 
 int main( int argc, char * argv[])
 {
     try
     {
-        unsigned int iterations( 0);
-
-        po::options_description desc("allowed options");
-        desc.add_options()
-            ("help,h", "help message")
-            ("iterations,i", po::value< unsigned int >( & iterations), "iterations");
-
-        po::variables_map vm;
-        po::store(
-            po::parse_command_line(
-                argc,
-                argv,
-                desc),
-            vm);
-        po::notify( vm);
-
-        if ( vm.count("help") )
-        {
-            std::cout << desc << std::endl;
-            return EXIT_SUCCESS;
-        }
-
-        if ( 0 >= iterations) throw std::invalid_argument("iterations must be greater than zero");
-
         bind_to_processor( 0);
 
-        unsigned int res = test_fcontext( iterations);
+        unsigned int res = test_fcontext();
         std::cout << "fcontext: average of " << res << " cycles per switch" << std::endl;
 #ifndef BOOST_WINDOWS
-        res = test_ucontext( iterations);
+        res = test_ucontext();
         std::cout << "ucontext: average of " << res << " cycles per switch" << std::endl;
 #endif
 
@@ -161,3 +131,6 @@
     { std::cerr << "unhandled exception" << std::endl; }
     return EXIT_FAILURE;
 }
+
+#undef CALL_FCONTEXT
+#undef CALL_UCONTEXT
Modified: trunk/libs/context/performance/performance_gcc_i386.hpp
==============================================================================
--- trunk/libs/context/performance/performance_gcc_i386.hpp	(original)
+++ trunk/libs/context/performance/performance_gcc_i386.hpp	2012-04-15 06:49:42 EDT (Sun, 15 Apr 2012)
@@ -8,6 +8,7 @@
 #define PERFORMANCE_GCC_I386_H
 
 #include <algorithm>
+#include <numeric>
 #include <cstddef>
 #include <vector>
 
@@ -20,6 +21,9 @@
 inline
 cycle_t get_cycles()
 {
+#if defined(__INTEL_COMPILER) || defined(__ICC) || defined(_ECC) || defined(__ICL)
+    return __rdtsc();
+#else
     boost::uint32_t res[2];
 
     __asm__ __volatile__ (
@@ -35,6 +39,7 @@
     );
 
     return * reinterpret_cast< cycle_t * >( res);
+#endif
 }
 
 struct measure
@@ -56,7 +61,7 @@
             overhead.begin(), overhead.end(),
             measure() );
     BOOST_ASSERT( overhead.begin() != overhead.end() );
-    return * std::min_element( overhead.begin(), overhead.end() );
+    return std::accumulate( overhead.begin(), overhead.end(), 0) / iterations;
 }
 
 #endif // PERFORMANCE_GCC_I386_H
Modified: trunk/libs/context/performance/performance_gcc_x86-64.hpp
==============================================================================
--- trunk/libs/context/performance/performance_gcc_x86-64.hpp	(original)
+++ trunk/libs/context/performance/performance_gcc_x86-64.hpp	2012-04-15 06:49:42 EDT (Sun, 15 Apr 2012)
@@ -8,6 +8,7 @@
 #define PERFORMANCE_GCC_X86_64_H
 
 #include <algorithm>
+#include <numeric>
 #include <cstddef>
 #include <vector>
 
@@ -20,8 +21,11 @@
 inline
 cycle_t get_cycles()
 {
+#if defined(__INTEL_COMPILER) || defined(__ICC) || defined(_ECC) || defined(__ICL)
+    return __rdtsc();
+#else
     boost::uint32_t res[2];
-
+    
     __asm__ __volatile__ (
         "xorl %%eax, %%eax\n"
         "cpuid\n"
@@ -33,8 +37,9 @@
         "cpuid\n"
         ::: "%rax", "%rbx", "%rcx", "%rdx"
     );
-
+    
     return * ( cycle_t *)res;
+#endif
 }
 
 struct measure
@@ -56,7 +61,7 @@
             overhead.begin(), overhead.end(),
             measure() );
     BOOST_ASSERT( overhead.begin() != overhead.end() );
-    return * std::min_element( overhead.begin(), overhead.end() );
+    return std::accumulate( overhead.begin(), overhead.end(), 0) / iterations;
 }
 
 #endif // PERFORMANCE_GCC_X86_64_H