runnig

badass radix sort

Oct 2nd, 2012
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.62 KB | None | 0 0
  1. // Badass radix sort, works upto 4 times faster than qsort()
  2. // for instance, on Intel Q9440:
  3. // qsort: 1484
  4. // radixsort: 370
  5.  
  6. // All credits to Victor J. Duvanenko
  7. // http://www.drdobbs.com/parallel/parallel-in-place-radix-sort-simplified/229000734?pgno=2
  8.  
  9.  
  10. #include <assert.h>
  11.  
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #include <time.h>
  15. #include <algorithm>
  16. #include <vector>
  17.  
  18.  
  19.  
  20. // Copyright(c), Victor J. Duvanenko, 2010
  21. // Function template In-place MSD Radix Sort implementation (simplified).
  22. template< class _Type, unsigned long PowerOfTwoRadix, unsigned long Log2ofPowerOfTwoRadix, long Threshold >
  23. inline void _RadixSort_Unsigned_PowerOf2Radix_L1( _Type* a, long last, _Type bitMask, unsigned long shiftRightAmount )
  24. {
  25.     unsigned long count[ PowerOfTwoRadix ];
  26.     for( unsigned long i = 0; i < PowerOfTwoRadix; i++ )
  27.     {
  28.     count[ i ] = 0;
  29.     }
  30.  
  31.     for ( long i = 0; i <= last; i++ )  
  32.     {
  33.     // Scan the array and count the number of times each value appears
  34.     count[ (unsigned long)(( a[ i ] & bitMask ) >> shiftRightAmount ) ]++;
  35.     }
  36.  
  37.     long startOfBin[ PowerOfTwoRadix + 1 ],
  38.          endOfBin[ PowerOfTwoRadix ], nextBin = 1;
  39.  
  40.     startOfBin[ 0 ] = endOfBin[ 0 ] = 0;    
  41.     startOfBin[ PowerOfTwoRadix ] = -1;         // sentinel
  42.  
  43.     for( unsigned long i = 1; i < PowerOfTwoRadix; i++ )
  44.     {
  45.         startOfBin[ i ] = endOfBin[ i ] = startOfBin[ i - 1 ] + count[ i - 1 ];
  46.     }
  47.  
  48.     for ( long _current = 0; _current <= last; )
  49.     {
  50.         unsigned long digit;
  51.  
  52.     // get the compiler to recognize that a register
  53.         // can be used for the loop instead of a[_current] memory location
  54.         _Type _current_element = a[ _current ];
  55.  
  56.         while( endOfBin[ digit = (unsigned long)(( _current_element & bitMask ) >> shiftRightAmount )] != _current )
  57.     {
  58.         std::swap( _current_element, a[ endOfBin[ digit ]++ ] );
  59.     }
  60.         a[ _current ] = _current_element;
  61.          endOfBin[ digit ]++;
  62.  
  63.     // skip over empty and full bins,
  64.         // when the end of the current bin reaches the start of the next bin
  65.         while( endOfBin[ nextBin - 1 ] == startOfBin[ nextBin ] )  
  66.     {
  67.         nextBin++;  
  68.     }
  69.         _current = endOfBin[ nextBin - 1 ];
  70.     }
  71.  
  72.     bitMask >>= Log2ofPowerOfTwoRadix;
  73.  
  74.     if ( bitMask != 0 )                     // end recursion when all the bits have been processes
  75.     {
  76.         if ( shiftRightAmount >= Log2ofPowerOfTwoRadix )
  77.     {
  78.         shiftRightAmount -= Log2ofPowerOfTwoRadix;
  79.     }
  80.         else                                                
  81.     {
  82.         shiftRightAmount  = 0;
  83.     }
  84.  
  85.         for( unsigned long i = 0; i < PowerOfTwoRadix; i++ )
  86.         {
  87.             long numberOfElements = endOfBin[ i ] - startOfBin[ i ];
  88.  
  89.         // endOfBin actually points to one beyond the bin
  90.             if ( numberOfElements >= Threshold )    
  91.         {
  92.                 _RadixSort_Unsigned_PowerOf2Radix_L1< _Type, PowerOfTwoRadix, Log2ofPowerOfTwoRadix, Threshold >( &a[ startOfBin[ i ]], numberOfElements - 1, bitMask, shiftRightAmount );
  93.             }
  94.             else if ( numberOfElements >= 2 )
  95.             {
  96.         std::sort(&a[ startOfBin[ i ]], &a[ startOfBin[ i ]] + numberOfElements);
  97.                 //insertionSortSimilarToSTLnoSelfAssignment( &a[ startOfBin[ i ]], numberOfElements );
  98.         }
  99.         }
  100.     }
  101. }
  102.  
  103. template< class _Type >
  104. inline void RadixSortInPlace_PowerOf2Radix_Unsigned( _Type* a, unsigned long a_size )
  105. {
  106.     const long Threshold                      =  25;
  107.     const unsigned long PowerOfTwoRadix       = 256;    // Radix - must be a power of 2
  108.     const unsigned long Log2ofPowerOfTwoRadix =   8;    // log( 256 ) = 8
  109.     unsigned long shiftRightAmount = sizeof( _Type ) * 8 - Log2ofPowerOfTwoRadix;       // Create bit-mask and shift right amount
  110.     _Type bitMask = (_Type)( ((_Type)( PowerOfTwoRadix - 1 )) << shiftRightAmount );  // bitMask controls/selects how many and which bits we process at a time
  111.  
  112.     if ( a_size >= Threshold )
  113.         _RadixSort_Unsigned_PowerOf2Radix_L1< _Type, PowerOfTwoRadix, Log2ofPowerOfTwoRadix, Threshold >( a, a_size - 1, bitMask, shiftRightAmount );
  114.     else
  115.     {
  116.     std::sort(a, a+a_size);
  117.         //insertionSortSimilarToSTLnoSelfAssignment( a, a_size );
  118.     }
  119. }
  120.  
  121. #define MAX 5
  122. #define SHOWPASS 0
  123.  
  124. typedef std::vector<unsigned int> input_type;
  125.  
  126. void radix_sort_simple(input_type & x)
  127. {
  128.     if ( x.empty() ) return; // at least one element
  129.  
  130.     typedef std::vector< std::vector< input_type::value_type > > buckets_type;
  131.     buckets_type buckets;
  132.  
  133.     buckets.resize(256 * 256); // allocate buckets
  134.     // for sorting decimal numbers
  135.  
  136.    
  137.  
  138.     // find maximum in the array to limit the main loop below
  139.     input_type::value_type max = *std::max_element(x.begin(), x.end());
  140.  
  141.     //begin radix sort
  142.     int byte_pos = 0;
  143.     for(; max != 0 ; max >>= 16, ++byte_pos)
  144.     {
  145.         // 1. determine which bucket each element should enter
  146.         // for each element in 'x':
  147.         for(input_type::const_iterator elem = x.begin(); elem != x.end(); ++elem)
  148.         {
  149.                 // calculate the bucket number:
  150.                 size_t const bucket_num = ( *elem >> (16*byte_pos) ) & 0xFFFF;
  151.                 // add the element to the list in the bucket:
  152.                 buckets[ bucket_num ].push_back( *elem );
  153.         }
  154.  
  155.         // 2. transfer results of buckets back into main array
  156.         input_type::iterator store_pos = x.begin();
  157.         // for each bucket:
  158.         for(buckets_type::iterator bucket = buckets.begin(); bucket != buckets.end(); ++bucket)
  159.         {
  160.                 // for each element in the bucket:
  161.                 for(buckets_type::value_type::const_iterator bucket_elem = bucket->begin();
  162.                         bucket_elem != bucket->end(); ++bucket_elem)
  163.                 {
  164.                         // copy the element into next position in the main array
  165.                         *store_pos++ = *bucket_elem;
  166.                 }
  167.                 bucket->clear(); // forget the current bucket's list
  168.         }
  169.     }
  170. }
  171.  
  172. int intcmp(const void* p1, const void *p2)
  173. {
  174.     int a = *((const int*)p1), b = *((const int*)p2);
  175.     return a - b;
  176. }
  177.  
  178.  
  179. int _tmain(int argc, _TCHAR* argv[])
  180. {
  181.     srand(0);
  182.  
  183.     const int N = 10000000;
  184.     std::vector<unsigned int> A, B;
  185.  
  186.    
  187.     for(int i = 0; i < N; ++i)
  188.     {
  189.         int v = rand();
  190.         A.push_back(v);
  191.         B.push_back(v);
  192.        
  193.     }
  194.    
  195.     clock_t t0 = clock();
  196.     qsort(&A.front(), N, sizeof(A[0]), intcmp);
  197.     printf("qsort: %d\n", clock() - t0);
  198.        
  199.  
  200.     t0 = clock();
  201.     RadixSortInPlace_PowerOf2Radix_Unsigned(&B.front(), B.size());
  202.     printf("radixsort: %d", clock() - t0);
  203.  
  204.     for(int i = 0; i < N; ++i)
  205.     {
  206.         assert(A[i] == B[i]);
  207.     }
  208.  
  209.    
  210.     return 0;
  211. }
Advertisement
Add Comment
Please, Sign In to add comment