$include_dir="/home/hyper-archives/ublas/include"; include("$include_dir/msg-header.inc") ?>
From: Gunter Winkler (guwi17_at_[hidden])
Date: 2007-06-30 09:13:50
Am Freitag, 29. Juni 2007 12:53 schrieb Markus Weimer:
> This version often is 10-20 times faster than the prod in uBLAS on
> sparse M. While this is all good, the implementation in test_fast()
> in the attached source is sometimes 1000 times faster on very sparse
> matrices, for example when running the code with the following
> parameters:
--------------------------
ublas compressed_matrix
--------------------------
M: 100000x10000
rightmult time            : 0.0100
our ublas leftmult time   : 30.5100
ublas leftmult time       : 0.0200
rightmult 1-norm          : 1001294.00000000
ublas leftmult 1-norm     : 1001294.00000000
our ublas leftmult 1-norm : 1001294.00000000
uBLAS is quite fast here ;-)
---8<---
#include <boost/numeric/ublas/operation.hpp>
//...
template <class MType>
void test_ublas(const size_t row, const size_t col, const char* data, 
const unsigned int maxloop){
  MType X(row,col);
  fill_ublas(row, col, data, X);
  vector<double> w(X.size2());
  for(size_t i=0; i<w.size(); ++i) w(i) = 1.0;
        
  
  vector<double> result(X.size1());
        
  const clock_t rightMultStartTime = clock();
  for(unsigned int loop=0; loop<maxloop; loop++) {
    // result = prod(X, w);
    axpy_prod(X, w, result, true);
  }
  const clock_t rightMultEndTime  = clock();
  const double rm_1norm = norm_1(result);
  
  vector<double> l(row);
  for(size_t i=0; i<l.size(); ++i){
    l[i] = 1.0;
  }
  
  vector<double> ourGrad(X.size2());
  
  const clock_t leftMultStartTime = clock();
  for(unsigned int i=0; i<maxloop; i++) {
    ourGrad = ourProd<vector<double>, MType, vector<double> >(l,X);
  }
  const clock_t leftMultEndTime = clock();
  const double our_l1norm = norm_1(ourGrad);
  assert(l.size() == row);
  assert(ourGrad.size() == col);
  assert(X.size1() == row);
  assert(X.size2() == col);
  
  vector<double> uBLASGrad(X.size2());
  const clock_t uBlasLeftMultStart = clock();
  for(unsigned int i=0; i<maxloop; i++) {
    // uBLASGrad = prod(l, X);
    axpy_prod(trans(X), l, uBLASGrad, true);
  }
  const clock_t uBlasLeftMultStop = clock();
  const double ublas_l1norm = norm_1(uBLASGrad);
  const double ublasleftTime  = ((double)(uBlasLeftMultStop - 
uBlasLeftMultStart))/CLOCKS_PER_SEC;
  const double leftTime       = ((double)(leftMultEndTime - 
leftMultStartTime))/CLOCKS_PER_SEC;
  const double rightTime      = ((double)(rightMultEndTime - 
rightMultStartTime))/CLOCKS_PER_SEC;
  
  // display time elapsed
  printf("rightmult time            : %.4lf\n",rightTime);
  printf("our ublas leftmult time   : %.4lf\n",leftTime);
  printf("ublas leftmult time       : %.4lf\n",ublasleftTime);
  printf("rightmult 1-norm          : %.8lf\n",rm_1norm);
  printf("ublas leftmult 1-norm     : %.8lf\n",ublas_l1norm);
  printf("our ublas leftmult 1-norm : %.8lf\n",our_l1norm);
}
--->8----