Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Badass radix sort, works upto 4 times faster than qsort()
- // for instance, on Intel Q9440:
- // qsort: 1484
- // radixsort: 370
- // All credits to Victor J. Duvanenko
- // http://www.drdobbs.com/parallel/parallel-in-place-radix-sort-simplified/229000734?pgno=2
- #include <assert.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <time.h>
- #include <algorithm>
- #include <vector>
- // Copyright(c), Victor J. Duvanenko, 2010
- // Function template In-place MSD Radix Sort implementation (simplified).
- template< class _Type, unsigned long PowerOfTwoRadix, unsigned long Log2ofPowerOfTwoRadix, long Threshold >
- inline void _RadixSort_Unsigned_PowerOf2Radix_L1( _Type* a, long last, _Type bitMask, unsigned long shiftRightAmount )
- {
- unsigned long count[ PowerOfTwoRadix ];
- for( unsigned long i = 0; i < PowerOfTwoRadix; i++ )
- {
- count[ i ] = 0;
- }
- for ( long i = 0; i <= last; i++ )
- {
- // Scan the array and count the number of times each value appears
- count[ (unsigned long)(( a[ i ] & bitMask ) >> shiftRightAmount ) ]++;
- }
- long startOfBin[ PowerOfTwoRadix + 1 ],
- endOfBin[ PowerOfTwoRadix ], nextBin = 1;
- startOfBin[ 0 ] = endOfBin[ 0 ] = 0;
- startOfBin[ PowerOfTwoRadix ] = -1; // sentinel
- for( unsigned long i = 1; i < PowerOfTwoRadix; i++ )
- {
- startOfBin[ i ] = endOfBin[ i ] = startOfBin[ i - 1 ] + count[ i - 1 ];
- }
- for ( long _current = 0; _current <= last; )
- {
- unsigned long digit;
- // get the compiler to recognize that a register
- // can be used for the loop instead of a[_current] memory location
- _Type _current_element = a[ _current ];
- while( endOfBin[ digit = (unsigned long)(( _current_element & bitMask ) >> shiftRightAmount )] != _current )
- {
- std::swap( _current_element, a[ endOfBin[ digit ]++ ] );
- }
- a[ _current ] = _current_element;
- endOfBin[ digit ]++;
- // skip over empty and full bins,
- // when the end of the current bin reaches the start of the next bin
- while( endOfBin[ nextBin - 1 ] == startOfBin[ nextBin ] )
- {
- nextBin++;
- }
- _current = endOfBin[ nextBin - 1 ];
- }
- bitMask >>= Log2ofPowerOfTwoRadix;
- if ( bitMask != 0 ) // end recursion when all the bits have been processes
- {
- if ( shiftRightAmount >= Log2ofPowerOfTwoRadix )
- {
- shiftRightAmount -= Log2ofPowerOfTwoRadix;
- }
- else
- {
- shiftRightAmount = 0;
- }
- for( unsigned long i = 0; i < PowerOfTwoRadix; i++ )
- {
- long numberOfElements = endOfBin[ i ] - startOfBin[ i ];
- // endOfBin actually points to one beyond the bin
- if ( numberOfElements >= Threshold )
- {
- _RadixSort_Unsigned_PowerOf2Radix_L1< _Type, PowerOfTwoRadix, Log2ofPowerOfTwoRadix, Threshold >( &a[ startOfBin[ i ]], numberOfElements - 1, bitMask, shiftRightAmount );
- }
- else if ( numberOfElements >= 2 )
- {
- std::sort(&a[ startOfBin[ i ]], &a[ startOfBin[ i ]] + numberOfElements);
- //insertionSortSimilarToSTLnoSelfAssignment( &a[ startOfBin[ i ]], numberOfElements );
- }
- }
- }
- }
- template< class _Type >
- inline void RadixSortInPlace_PowerOf2Radix_Unsigned( _Type* a, unsigned long a_size )
- {
- const long Threshold = 25;
- const unsigned long PowerOfTwoRadix = 256; // Radix - must be a power of 2
- const unsigned long Log2ofPowerOfTwoRadix = 8; // log( 256 ) = 8
- unsigned long shiftRightAmount = sizeof( _Type ) * 8 - Log2ofPowerOfTwoRadix; // Create bit-mask and shift right amount
- _Type bitMask = (_Type)( ((_Type)( PowerOfTwoRadix - 1 )) << shiftRightAmount ); // bitMask controls/selects how many and which bits we process at a time
- if ( a_size >= Threshold )
- _RadixSort_Unsigned_PowerOf2Radix_L1< _Type, PowerOfTwoRadix, Log2ofPowerOfTwoRadix, Threshold >( a, a_size - 1, bitMask, shiftRightAmount );
- else
- {
- std::sort(a, a+a_size);
- //insertionSortSimilarToSTLnoSelfAssignment( a, a_size );
- }
- }
- #define MAX 5
- #define SHOWPASS 0
- typedef std::vector<unsigned int> input_type;
- void radix_sort_simple(input_type & x)
- {
- if ( x.empty() ) return; // at least one element
- typedef std::vector< std::vector< input_type::value_type > > buckets_type;
- buckets_type buckets;
- buckets.resize(256 * 256); // allocate buckets
- // for sorting decimal numbers
- // find maximum in the array to limit the main loop below
- input_type::value_type max = *std::max_element(x.begin(), x.end());
- //begin radix sort
- int byte_pos = 0;
- for(; max != 0 ; max >>= 16, ++byte_pos)
- {
- // 1. determine which bucket each element should enter
- // for each element in 'x':
- for(input_type::const_iterator elem = x.begin(); elem != x.end(); ++elem)
- {
- // calculate the bucket number:
- size_t const bucket_num = ( *elem >> (16*byte_pos) ) & 0xFFFF;
- // add the element to the list in the bucket:
- buckets[ bucket_num ].push_back( *elem );
- }
- // 2. transfer results of buckets back into main array
- input_type::iterator store_pos = x.begin();
- // for each bucket:
- for(buckets_type::iterator bucket = buckets.begin(); bucket != buckets.end(); ++bucket)
- {
- // for each element in the bucket:
- for(buckets_type::value_type::const_iterator bucket_elem = bucket->begin();
- bucket_elem != bucket->end(); ++bucket_elem)
- {
- // copy the element into next position in the main array
- *store_pos++ = *bucket_elem;
- }
- bucket->clear(); // forget the current bucket's list
- }
- }
- }
- int intcmp(const void* p1, const void *p2)
- {
- int a = *((const int*)p1), b = *((const int*)p2);
- return a - b;
- }
- int _tmain(int argc, _TCHAR* argv[])
- {
- srand(0);
- const int N = 10000000;
- std::vector<unsigned int> A, B;
- for(int i = 0; i < N; ++i)
- {
- int v = rand();
- A.push_back(v);
- B.push_back(v);
- }
- clock_t t0 = clock();
- qsort(&A.front(), N, sizeof(A[0]), intcmp);
- printf("qsort: %d\n", clock() - t0);
- t0 = clock();
- RadixSortInPlace_PowerOf2Radix_Unsigned(&B.front(), B.size());
- printf("radixsort: %d", clock() - t0);
- for(int i = 0; i < N; ++i)
- {
- assert(A[i] == B[i]);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment