Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Fast computations of cumulative sums.
- * D. Lemire, July 2012
- *
- * Best results with:
- *
- $ g++-4.7 -funroll-loops -O3 -o cumulsum cumulsum.cpp
- $ ./unrolldeltas
- but we want to avoid the -funroll-loops flag.
- */
- // This may require GCC 4.4/
- #include <sys/stat.h>
- #include <sys/time.h>
- #include <sys/types.h>
- #include <iostream>
- #include <cassert>
- #include <vector>
- #include <stdexcept>
- using namespace std;
- class WallClockTimer {
- public:
- struct timeval t1, t2;
- WallClockTimer() :
- t1(), t2() {
- gettimeofday(&t1, 0);
- t2 = t1;
- }
- void reset() {
- gettimeofday(&t1, 0);
- t2 = t1;
- }
- int elapsed() {
- return ((t2.tv_sec - t1.tv_sec) * 1000) + ((t2.tv_usec - t1. tv_usec)
- / 1000);
- }
- int split() {
- gettimeofday(&t2, 0);
- return elapsed();
- }
- };
- vector<int> givemeanarray(size_t N) {
- vector<int> bigarray;
- bigarray.reserve(N);
- for(unsigned int k = 0; k<N; ++k)
- bigarray.push_back(k+k/3);
- return bigarray;
- }
- void slowishSum(vector<int> & data) {
- size_t s = data.size();
- if(s == 0) return;
- int* ptr = &data[0];
- for (size_t i = 1; i != s; ++i) {
- ptr[i] += ptr[i - 1] ;
- }
- }
- // By Vasily Volkov, improved by Daniel Lemire
- void fastSum(vector<int> & data) {
- size_t s = data.size();
- int* ptr = &data[0];
- if(s == 0) return;
- const size_t UnrollQty = 4;
- size_t sz0 = (s / UnrollQty) * UnrollQty; // equal to 0, if data.size() < UnrollQty
- size_t i = 1;
- if (sz0>=UnrollQty) {
- int a = ptr[0];
- for (; i < sz0 - UnrollQty ; i += UnrollQty) {
- a = (ptr[i] += a);
- a = (ptr[i + 1] += a);
- a = (ptr[i + 2] += a);
- a = (ptr[i + 3] += a);
- }
- }
- for (; i != s; ++i) {
- ptr[i] += ptr[i- 1] ;
- }
- }
- // loop unrolling helps avoid the need for -funroll-loops
- void sum(vector<int> & data) {
- size_t s = data.size();
- int* ptr = &data[0];
- if(s == 0) return;
- size_t i = 1;
- for (; i < s - 1; i+=2) {
- ptr[i] += ptr[i - 1];
- ptr[i+1] += ptr[i ];
- }
- for (; i != s; ++i) {
- ptr[i] += ptr[i - 1];
- }
- }
- void test(size_t N ) {
- WallClockTimer time;
- for(int t = 0; t<2;++t) {
- cout <<" test # "<< t<<endl;
- vector<int> data = givemeanarray(N) ;
- vector<int> copydata(data);
- time.reset();
- slowishSum(data);
- cout<<"basic sum "<<N/(1000.0*time.split())<<endl;
- data = copydata;
- time.reset();
- sum(data);
- cout<<"smarter sum "<<N/(1000.0*time.split())<<endl;
- data = copydata;
- time.reset();
- fastSum(data);
- cout<<"fast sum "<<N/(1000.0*time.split())<<endl;
- cout<<endl<<endl<<endl;
- }
- }
- int main() {
- cout << "reporting speed in million of integers per second "<<endl;
- size_t N = 50 * 1000 * 1000 ;
- test(N);
- cout<<"============"<<endl;
- test(N);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement