Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <windows.h>
- #include <stdio.h>
- #include <tchar.h>
- #include <time.h>
- #include <emmintrin.h>
- #include <vector>
- //
- static const __m128i zero = _mm_set_epi32( 0, 0, 0, 0 );
- static const __m128i xor = _mm_set_epi32( 0, 0, -1, -1 );
- static const __m128i shfl = _mm_set_epi8(
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0
- );
- static const int Exp = 16;
- //
- inline void Quadrant1( const __m128i&pair, volatile int*q1, volatile int*q2 ){
- __m128i gz = _mm_cmpgt_epi32( pair, zero );
- __m128i lz = _mm_cmpgt_epi32( zero, pair );
- __m128i p1 = _mm_unpacklo_epi32( gz, lz );
- __m128i p2 = _mm_unpackhi_epi32( gz, lz );
- *q1 = _mm_shuffle_epi8( p1, shfl ).m128i_i32[0];
- *q2 = _mm_shuffle_epi8( p2, shfl ).m128i_i32[0];
- }
- inline void Quadrant1( int x1, int y1, int x2, int y2, const __m128i&c, volatile int*q1, volatile int*q2 ){
- return Quadrant1( _mm_sub_epi32( _mm_set_epi32( y2, x2, y1, x1 ), c ), q1, q2 );
- }
- //
- inline void Quadrant2( const __m128i&pair, volatile int*q1, volatile int*q2 ){
- __m128i gz = _mm_cmpgt_epi32( pair, zero );
- __m128i lz = _mm_cmpgt_epi32( zero, pair );
- __m128i p1 = _mm_unpacklo_epi32( gz, lz );
- __m128i p2 = _mm_unpackhi_epi32( gz, lz );
- //
- if( lz.m128i_i32[1] ){
- p1 = _mm_xor_si128( p1, xor );
- }
- if( lz.m128i_i32[3] ){
- p2 = _mm_xor_si128( p2, xor );
- }
- *q1 = _mm_shuffle_epi8( p1, shfl ).m128i_i32[0];
- *q2 = _mm_shuffle_epi8( p2, shfl ).m128i_i32[0];
- }
- //
- inline void Quadrant2( int x1, int y1, int x2, int y2, const __m128i&c, volatile int*q1, volatile int*q2 ){
- return Quadrant2( _mm_sub_epi32( _mm_set_epi32( y2, x2, y1, x1 ), c ), q1, q2 );
- }
- //
- int _tmain( int argc, LPCTSTR argv[] ){
- static const DWORD Measure = 5000;
- //
- std::vector< __m128i >Points;
- __m128i c;
- ULONGLONG Start, T0, T1, T2;
- DWORD_PTR Mask;
- double ms;
- SIZE_T ic, Count;
- int cx, cy, p;
- volatile int q1, q2;
- //
- if( ( argc < 2 ) || ( 1 != _stscanf_s( argv[1], TEXT("%Iu"), &Count ) ) ){
- return 0;
- }
- _tprintf( TEXT("count=%Iu\r\n" ), Count );
- Mask = ::SetThreadAffinityMask( ::GetCurrentThread(), 1 );
- _tprintf( TEXT("measure...\r\n" ) );
- Start = __rdtsc();
- Sleep( Measure );
- ms = double( __rdtsc() - Start ) / Measure;
- _tprintf( TEXT("Freq=%lf GHz\r\n" ), ms / 1000000 );
- //
- srand( (unsigned)time( nullptr ) );
- Points.resize( Count / 2 );
- _tprintf( TEXT("fill...\r\n" ) );
- cx = rand() - INT_MAX / 2;
- cy = rand() - INT_MAX / 2;
- c = _mm_set_epi32( cy, cx, cy, cx );
- for( ic = Points.size() ; ic-- ; ){
- Points[ic].m128i_i32[0] = rand() - INT_MAX / 2;
- Points[ic].m128i_i32[1] = rand() - INT_MAX / 2;
- Points[ic].m128i_i32[2] = rand() - INT_MAX / 2;
- Points[ic].m128i_i32[3] = rand() - INT_MAX / 2;
- }
- _tprintf( TEXT("calc...\r\n" ) );
- Start = __rdtsc();
- for( p = 0, ic = Points.size() / Exp ; ic-- ; p += Exp ){
- Points[p + 0] = _mm_sub_epi32( Points[p + 0], c );
- Points[p + 1] = _mm_sub_epi32( Points[p + 1], c );
- Points[p + 2] = _mm_sub_epi32( Points[p + 2], c );
- Points[p + 3] = _mm_sub_epi32( Points[p + 3], c );
- Points[p + 4] = _mm_sub_epi32( Points[p + 4], c );
- Points[p + 5] = _mm_sub_epi32( Points[p + 5], c );
- Points[p + 6] = _mm_sub_epi32( Points[p + 6], c );
- Points[p + 7] = _mm_sub_epi32( Points[p + 7], c );
- Points[p + 8] = _mm_sub_epi32( Points[p + 8], c );
- Points[p + 9] = _mm_sub_epi32( Points[p + 9], c );
- Points[p + 10] = _mm_sub_epi32( Points[p + 10], c );
- Points[p + 11] = _mm_sub_epi32( Points[p + 11], c );
- Points[p + 12] = _mm_sub_epi32( Points[p + 12], c );
- Points[p + 13] = _mm_sub_epi32( Points[p + 13], c );
- Points[p + 14] = _mm_sub_epi32( Points[p + 14], c );
- Points[p + 15] = _mm_sub_epi32( Points[p + 15], c );
- }
- T0 = __rdtsc() - Start;
- Start = __rdtsc();
- for( p = 0, ic = Points.size() / Exp ; ic-- ; p += Exp ){
- Quadrant1( Points[p + 0], &q1, &q2 );
- Quadrant1( Points[p + 1], &q1, &q2 );
- Quadrant1( Points[p + 2], &q1, &q2 );
- Quadrant1( Points[p + 3], &q1, &q2 );
- Quadrant1( Points[p + 4], &q1, &q2 );
- Quadrant1( Points[p + 5], &q1, &q2 );
- Quadrant1( Points[p + 6], &q1, &q2 );
- Quadrant1( Points[p + 7], &q1, &q2 );
- Quadrant1( Points[p + 8], &q1, &q2 );
- Quadrant1( Points[p + 9], &q1, &q2 );
- Quadrant1( Points[p + 10], &q1, &q2 );
- Quadrant1( Points[p + 11], &q1, &q2 );
- Quadrant1( Points[p + 12], &q1, &q2 );
- Quadrant1( Points[p + 13], &q1, &q2 );
- Quadrant1( Points[p + 14], &q1, &q2 );
- Quadrant1( Points[p + 15], &q1, &q2 );
- }
- T1 = __rdtsc() - Start;
- Start = __rdtsc();
- for( p = 0, ic = Points.size() / Exp ; ic-- ; p += Exp ){
- Quadrant2( Points[p + 0], &q1, &q2 );
- Quadrant2( Points[p + 1], &q1, &q2 );
- Quadrant2( Points[p + 2], &q1, &q2 );
- Quadrant2( Points[p + 3], &q1, &q2 );
- Quadrant2( Points[p + 4], &q1, &q2 );
- Quadrant2( Points[p + 5], &q1, &q2 );
- Quadrant2( Points[p + 6], &q1, &q2 );
- Quadrant2( Points[p + 7], &q1, &q2 );
- Quadrant2( Points[p + 8], &q1, &q2 );
- Quadrant2( Points[p + 9], &q1, &q2 );
- Quadrant2( Points[p + 10], &q1, &q2 );
- Quadrant2( Points[p + 11], &q1, &q2 );
- Quadrant2( Points[p + 12], &q1, &q2 );
- Quadrant2( Points[p + 13], &q1, &q2 );
- Quadrant2( Points[p + 14], &q1, &q2 );
- Quadrant2( Points[p + 15], &q1, &q2 );
- }
- T2 = __rdtsc() - Start;
- _tprintf(
- TEXT("SIMD time Recenter (Freq/1GHz): %lf/%lf ms.\r\n"), T0 / ms, T0 / 1000000.0
- );
- _tprintf(
- TEXT("SIMD time Quadrant1 (Freq/1GHz): %lf/%lf ms.\r\n"), T1 / ms, T1 / 1000000.0
- );
- _tprintf(
- TEXT("SIMD time Quadrant2 (Freq/1GHz): %lf/%lf ms.\r\n"), T2 / ms, T2 / 1000000.0
- );
- //
- _tprintf(
- TEXT("SIMD avg. time Recenter (Freq/1GHz): %lf/%lf ms.\r\n"),
- ( T0 / ms ) / Count, ( T0 / 1000000.0 ) / Count
- );
- _tprintf(
- TEXT("SIMD avg. time Quadrant1 (Freq/1GHz): %lf/%lf ms.\r\n"),
- ( T1 / ms ) / Count, ( T1 / 1000000.0 ) / Count
- );
- _tprintf(
- TEXT("SIMD avg. time Quadrant2 (Freq/1GHz): %lf/%lf ms.\r\n"),
- ( T2 / ms ) / Count, ( T2 / 1000000.0 ) / Count
- );
- _tprintf(
- TEXT("SIMD speed Recenter (Freq/1GHz): %lf/%lf p./s.\r\n"),
- Count / ( T0 / ms ) * 1000, Count / ( T0 / 1000000.0 ) * 1000
- );
- _tprintf(
- TEXT("SIMD speed Quadrant1 (Freq/1GHz): %lf/%lf q./s.\r\n"),
- Count / ( T1 / ms ) * 1000, Count / ( T1 / 1000000.0 ) * 1000
- );
- _tprintf(
- TEXT("SIMD speed Quadrant2 (Freq/1GHz): %lf/%lf q./s.\r\n"),
- Count / ( T2 / ms ) * 1000, Count / ( T2 / 1000000.0 ) * 1000
- );
- ::SetThreadAffinityMask( ::GetCurrentThread(), Mask );
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement