Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // BitonicSort.cpp : コンソール アプリケーションのエントリ ポイントを定義します。
- //
- #include "stdafx.h"
- //#include <stdlib.h>
- //#include <time.h>
- //#include <amp.h>
- //#include <ppl.h>
- #define N ( 2097152 )
- // 基本的なバイトニックソート
- void bitonicSort( int num, int *ary1 )
- {
- // 2の冪乗でなければ計算しない
- if( ( ( num - 1 ) & num ) != 0 )
- return;
- for( int block = 2; block <= num; block *= 2 ) {
- for( int step = block / 2; step >= 1; step /= 2 ) {
- for( int idx = 0; idx < num; idx++ ) {
- int e = idx ^ step;
- if( e > idx ) {
- int v1 = ary1[ idx ];
- int v2 = ary1[ e ];
- if( ( idx & block ) != 0 ) {
- if( v1 < v2 ) {
- ary1[ e ] = v1;
- ary1[ idx ] = v2;
- }
- } else {
- if( v1 > v2 ) {
- ary1[ e ] = v1;
- ary1[ idx ] = v2;
- }
- }
- }
- }
- }
- }
- }
- // 条件分岐をできるだけ排したバイトニックソート
- void bitonicSort2( int num, int *ary1 )
- {
- // 2の冪乗でなければ計算しない
- if( ( ( num - 1 ) & num ) != 0 )
- return;
- for( int block = 2; block <= num; block *= 2 ) {
- for( int step = block / 2; step >= 1; step /= 2 ) {
- unsigned int maskLow = (unsigned int)step - 1;
- unsigned int maskHigh = 0xFFFFFFFF ^ maskLow;
- for( int i = 0; i < num / 2; i++ ) {
- int idx = ( ( i & maskHigh ) << 1 ) | ( i & maskLow );
- int v1 = ary1[ idx ];
- int v2 = ary1[ idx + step ];
- int isAsc = ( ( idx & block ) != 0 ) ? 1 : 0;
- int isBigger = ( v1 > v2 ) ? 1 : 0;
- bool needSwap = ( ( isAsc + isBigger ) == 1 ) ? true : false;
- if( needSwap ) {
- ary1[ idx ] = v2;
- ary1[ idx + step ] = v1;
- }
- }
- }
- }
- }
- // OpenMPで並列化したバイトニックソート
- void bitonicSortOpenMP( int num, int *ary1 )
- {
- // 2の冪乗でなければ計算しない
- if( ( ( num - 1 ) & num ) != 0 )
- return;
- for( int block = 2; block <= num; block *= 2 ) {
- for( int step = block / 2; step >= 1; step /= 2 ) {
- #pragma omp parallel for
- for( int idx = 0; idx < num; idx++ ) {
- int e = idx ^ step;
- if( e > idx ) {
- int v1 = ary1[ idx ];
- int v2 = ary1[ e ];
- if( ( idx & block ) != 0 ) {
- if( v1 < v2 ) {
- ary1[ e ] = v1;
- ary1[ idx ] = v2;
- }
- } else {
- if( v1 > v2 ) {
- ary1[ e ] = v1;
- ary1[ idx ] = v2;
- }
- }
- }
- }
- }
- }
- }
- // PPLで並列化したバイトニックソート
- void bitonicSortPPL( int num, int *ary1 )
- {
- // 2の冪乗でなければ計算しない
- if( ( ( num - 1 ) & num ) != 0 )
- return;
- for( int block = 2; block <= num; block *= 2 ) {
- for( int step = block / 2; step >= 1; step /= 2 ) {
- concurrency::parallel_for( 0, num, 1, [&]( int idx )
- {
- int e = idx ^ step;
- if( e > idx ) {
- int v1 = ary1[ idx ];
- int v2 = ary1[ e ];
- if( ( idx & block ) != 0 ) {
- if( v1 < v2 ) {
- ary1[ e ] = v1;
- ary1[ idx ] = v2;
- }
- } else {
- if( v1 > v2 ) {
- ary1[ e ] = v1;
- ary1[ idx ] = v2;
- }
- }
- }
- } );
- }
- }
- }
- // C++ AMPで並列化したバイトニックソート
- void bitonicSortAmp( int num, int *ary1 )
- {
- // 2の冪乗でなければ計算しない
- if( ( ( num - 1 ) & num ) != 0 )
- return;
- concurrency::array_view< int, 1 > av1( num, ary1 );
- for( int block = 2; block <= num; block *= 2 ) {
- for( int step = block / 2; step >= 1; step /= 2 ) {
- concurrency::extent< 1 > extent = av1.extent;
- concurrency::parallel_for_each(
- extent,
- [=]( concurrency::index< 1 > i ) restrict( amp )
- {
- int idx = i[ 0 ];
- int e = idx ^ step;
- if( e > idx ) {
- int v1 = av1[ idx ];
- int v2 = av1[ e ];
- if( ( idx & block ) != 0 ) {
- if( v1 < v2 ) {
- av1[ e ] = v1;
- av1[ idx ] = v2;
- }
- } else {
- if( v1 > v2 ) {
- av1[ e ] = v1;
- av1[ idx ] = v2;
- }
- }
- }
- } );
- }
- }
- av1.synchronize();
- }
- // C++ AMPで並列化したバイトニックソート 2
- void bitonicSort2Amp( int num, int *ary1 )
- {
- // 2の冪乗でなければ計算しない
- if( ( ( num - 1 ) & num ) != 0 )
- return;
- concurrency::array_view< int, 1 > av1( num, ary1 );
- for( int block = 2; block <= num; block *= 2 ) {
- for( int step = block / 2; step >= 1; step /= 2 ) {
- unsigned int maskLow = (unsigned int)step - 1;
- unsigned int maskHigh = 0xFFFFFFFF ^ maskLow;
- concurrency::extent< 1 > extent = av1.extent / 2;
- concurrency::parallel_for_each(
- extent,
- [=]( concurrency::index< 1 > i ) restrict( amp )
- {
- int idx = ( ( i[ 0 ] & maskHigh ) << 1 ) | ( i[ 0 ] & maskLow );
- int v1 = av1[ idx ];
- int v2 = av1[ idx + step ];
- int isAsc = ( ( idx & block ) != 0 ) ? 1 : 0;
- int isBigger = ( v1 > v2 ) ? 1 : 0;
- bool needSwap = ( ( isAsc + isBigger ) == 1 ) ? true : false;
- if( needSwap ) {
- av1[ idx ] = v2;
- av1[ idx + step ] = v1;
- }
- } );
- }
- }
- av1.synchronize();
- }
- void bench( int m )
- {
- srand( 123456789 );
- int *ary1 = new int[ N ];
- for( int i = 0; i < N; i++ ) {
- ary1[ i ] = ( rand() >> 1 ) * ( rand() >> 1 ) * ( rand() >> 1 );
- }
- clock_t start, end;
- start = clock();
- switch( m ) {
- case 1: bitonicSort( N, ary1 ); break;
- case 2: bitonicSort2( N, ary1 ); break;
- case 3: bitonicSortOpenMP( N, ary1 ); break;
- case 4: bitonicSortPPL( N, ary1 ); break;
- case 5: bitonicSortAmp( N, ary1 ); break;
- case 6: bitonicSort2Amp( N, ary1 ); break;
- default: throw "バグバグ";
- }
- end = clock();
- printf( "%d の実行時間は %f [sec]のようです\n",
- m, ( (double)end - start ) / CLOCKS_PER_SEC );
- int prev = INT_MIN;
- for( int i = 0; i < N; i++ ) {
- //printf( "%d\n", ary1[ i ] );
- if( prev > ary1[ i ] ) {
- throw "ソートできてねぇだろボケェェェェェ";
- }
- prev = ary1[ i ];
- }
- delete[] ary1;
- }
- int main()
- {
- bench( 1 );
- bench( 2 );
- bench( 3 );
- bench( 4 );
- //bench( 5 ); // error on Radeon GPU
- bench( 6 );
- puts("おしまい");
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement