Advertisement
dlemire

bitmapslow.cpp

Feb 27th, 2012
919
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.40 KB | None | 0 0
  1. /**
  2. *
  3. * Daniel Lemire, February 2012, http://lemire.me/
  4. *
  5. * Suppose that you want to copy an array over
  6. * another, except for some exceptions.
  7. * What is faster?
  8. *
  9. *  g++ -Ofast -o bitmapslow bitmapslow.cpp
  10.  
  11. *
  12. * Published on blog.
  13. * Revised on Feb. 27 2011 after bug report by Martin Trenkmann
  14. */
  15.  
  16.  
  17. #include <iostream>
  18. #include <vector>
  19. #include <cstdlib>
  20. #include <cstring>
  21. #include <sys/stat.h>
  22. #include <sys/time.h>
  23. #include <sys/types.h>
  24.  
  25. using namespace std;
  26.  
  27. typedef unsigned int uint;
  28. typedef unsigned char byte;
  29.  
  30.  
  31.  
  32.  
  33. class ZTimer
  34. {
  35. public:
  36.     struct timeval t1, t2;
  37. public:
  38.     ZTimer() :  t1(), t2() { gettimeofday(&t1,0); t2 = t1; }
  39.     void reset() {gettimeofday(&t1,0); t2 = t1;}
  40.     int elapsed() { return ((t2.tv_sec - t1.tv_sec) * 1000) + ((t2.tv_usec - t1.
  41. tv_usec) / 1000); }
  42.     int split() { gettimeofday(&t2,0); return elapsed(); }
  43. };
  44.  
  45.  
  46. /**
  47. * This copies the data from begin to end. We write to "out".
  48. */
  49. __attribute__ ((noinline)) void justcopy(const uint * const  __restrict__ begin,
  50.         const uint * const  end,
  51.     uint * __restrict__ out) {
  52.     for(const uint *  __restrict__ i = begin; i!=end;++i) {
  53.         *(out++) = *i;
  54.     }
  55.     /* we could use this instead of the previous loop, but it is no faster:
  56.      memcpy ( out, begin, (end-begin)*sizeof(uint) );
  57.      at least under gcc 4.6
  58.     */
  59. }
  60.  
  61. /**
  62. * This copies the data from begin to end, except at some locations
  63. * given by exceptionpos where it must use the values from
  64. * exceptionval. We write to "out".
  65. */
  66. __attribute__ ((noinline)) void patchedcopy(const uint * const  __restrict__ begin,
  67.         const uint * const  end,
  68.         const  uint * __restrict__ exceptionval,
  69.         const vector<uint> & exceptionpos,
  70.     uint * __restrict__ out) {
  71.     uint * __restrict__ tmpout = out;
  72.     for(const uint *  __restrict__ i = begin; i!=end;++i) {
  73.         *(tmpout++) = *i;
  74.     }
  75.     for(vector<uint>::const_iterator pos = exceptionpos.begin(); pos!= exceptionpos.end();++pos) {
  76.         *(out + *pos) = *(exceptionval++);
  77.     }
  78. }
  79.  
  80. /**
  81. * This copies the data from begin to end, except at some locations
  82. * given by exceptionpos where it must use the values from
  83. * exceptionval. We write to "out".
  84. *
  85. * Proposed by Luc Golden
  86. */
  87. __attribute__ ((noinline)) void patchedcopy2(const uint * const  __restrict__ begin,
  88.         const uint * const  end,
  89.         const  uint * __restrict__ exceptionval,
  90.         const vector<uint> & exceptionpos,
  91.     uint * __restrict__ out) {
  92.     vector<uint>::const_iterator pos = exceptionpos.begin();
  93.     const uint *  __restrict__ i = begin;
  94.     for(; i!=end;++i) {
  95.         if(i-begin == *pos) {
  96.             *(out++) =  *(exceptionval++);
  97.             ++pos;
  98.             if(pos==exceptionpos.end()) {
  99.               // no more exceptions
  100.               for(;i!=end;++i) {
  101.                 *(out++) = *i;
  102.               }
  103.               break;
  104.             }
  105.         } else
  106.             *(out++) = *i;
  107.     }
  108.    
  109. }
  110.  
  111.  
  112.  
  113. /**
  114. * This copies the data from begin to end, except at some locations
  115. * given by exceptionbitmap where it must use the values from
  116. * exceptionval. We write to "out".
  117. */
  118. __attribute__ ((noinline)) void bitmapcopy(const uint * const  __restrict__ begin,
  119.         const uint * const  end,
  120.         const  uint * __restrict__ exceptionval,
  121.         const vector<uint> & exceptionbitmap,
  122.     uint * __restrict__ out) {
  123.     vector<uint>::const_iterator j = exceptionbitmap.begin();
  124.     for(const uint *  __restrict__ i = begin; i<end;++i) {
  125.         for(uint k = 0; k<32;++k,++i)
  126.             *(out++) = ((*j) & (1U<<k))? *(exceptionval++) : *i;
  127.         ++j;--i;
  128.     }
  129. }
  130.  
  131. /**
  132. * This copies the data from begin to end, except at some locations
  133. * given by exceptionbitmap where it must use the values from
  134. * exceptionval. We write to "out".
  135. */
  136. __attribute__ ((noinline)) void bitmapcopy8bits(const uint * const  __restrict__ begin,
  137.         const uint * const  end,
  138.         const  uint * __restrict__ exceptionval,
  139.         const vector<uint> & exceptionbitmap,
  140.     uint * __restrict__ out) {
  141.     const byte * __restrict__ j = reinterpret_cast<const byte *>(&exceptionbitmap[0]);
  142.     for(const uint *  __restrict__ i = begin; i<end;++i) {
  143.         for(uint k = 0; k<8;++k,++i)
  144.             *(out++) = ((*j) & (1U<<k))? *(exceptionval++) : *i;
  145.         ++j;--i;
  146.     }
  147. }
  148.  
  149.  
  150.  
  151.  
  152. vector<uint> generateArray(uint N) {
  153.     vector<uint> ans(N);
  154.     for(uint k = 0; k<N;++k)
  155.       ans[k] = rand();
  156.     return ans;
  157. }
  158.  
  159. pair<vector<uint>, vector<uint> > generateExceptLocations(uint N, uint density) {
  160.     vector<uint> positions;
  161.     vector<uint> bitmap(N/32);
  162.     for(uint k = 0; k<N;k+= density) {
  163.         positions.push_back(k);
  164.         bitmap[k/32] |= (1<<(k%32));
  165.     }
  166.     return pair<vector<uint>, vector<uint> >(positions,bitmap);
  167. }
  168. int main() {
  169.     uint N = 1000000*32;//1024*1024*100;
  170.     uint density = 100;
  171.     vector<uint> data = generateArray(N);
  172.     vector<uint> out(N);
  173.     ZTimer z;
  174.  
  175.     vector<uint> exceptionvals = generateArray(N);// more than we need
  176.     cout <<" density(%)\t time with patching \t Luc Golden\t time with bitmaps\t time with 8-bit bitmaps\t justcopy"<<endl;
  177.     for(uint density = 1; density<50; density+=5) {
  178.         pair<vector<uint>, vector<uint> > exceptionpos = generateExceptLocations( N, density);
  179.    
  180.         cout<<100.0/density<<"\t\t\t";
  181.  
  182.        
  183.         z.reset();
  184.         patchedcopy(& data[0],
  185.         (& data[0]) + N,
  186.         &  exceptionvals[0],
  187.         exceptionpos.first,
  188.         &out[0]);
  189.         cout<<z.split()<<"\t\t\t";
  190.  
  191.         z.reset();
  192.         patchedcopy2(& data[0],
  193.         (& data[0]) + N,
  194.         &  exceptionvals[0],
  195.         exceptionpos.first,
  196.         &out[0]);
  197.         cout<<z.split()<<"\t\t\t";
  198.  
  199.         z.reset();
  200.         bitmapcopy(& data[0],
  201.         (& data[0]) + N,
  202.         &  exceptionvals[0],
  203.         exceptionpos.second,
  204.         &out[0]);  
  205.         cout<<z.split()<<"\t\t\t";
  206.  
  207.  
  208.         z.reset();
  209.         bitmapcopy8bits(& data[0],
  210.         (& data[0]) + N,
  211.         &  exceptionvals[0],
  212.         exceptionpos.second,
  213.         &out[0]);  
  214.         cout<<z.split()<<"\t\t\t";
  215.  
  216.  
  217.         z.reset();
  218.         justcopy(& data[0],
  219.         (& data[0]) + N,
  220.         &out[0]);  
  221.         cout<<z.split()<<"\t";
  222.  
  223.        
  224.         cout<<endl;
  225.     }
  226.     return 0;
  227. }
  228.  
  229.  
  230.  
  231. /**
  232.  
  233.   Processor Name:   Intel Core i7
  234.   Processor Speed:  1.8 GHz
  235.  
  236. $ g++-4 --version
  237. g++-4 (GCC) 4.6.2
  238. $ g++-4 -funroll-loops -Ofast -o bitmapslow bitmapslow.cpp
  239. lemire@Daniels-MacBook-Air ~/CVS/privatesvnrepos/BitFor/code/Cpp $ ./bitmapslow
  240.  density(%)  time with patching      Luc Golden  time with bitmaps   time with 8-bit bitmaps     justcopy
  241. 100         59          62          48          26          25 
  242. 16.6667         46          40          31          27          25 
  243. 9.09091         47          40          32          27          24 
  244. 6.25            45          42          30          26          24 
  245. 4.7619          43          40          31          26          24 
  246. 3.84615         41          40          30          26          25 
  247. 3.22581         38          41          32          26          24 
  248. 2.77778         37          41          30          26          24 
  249. 2.43902         35          41          30          26          25 
  250. 2.17391         35          43          29          26          25 
  251. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement