Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // newmond.cpp : Defines the entry point for the console application.
- //
- #include "stdafx.h"
- #include <xmmintrin.h>
- #include <emmintrin.h>
- #include <ctime>
- #include <cstdlib>
- #include <iostream>
- using namespace std;
- void IterateMandelbrot( __m128 a, __m128 b );
- int IterateMandelbrot2( float a, float b );
- int _tmain(int argc, _TCHAR* argv[])
- {
- unsigned int start = clock();
- const float xres= 1.0f/(float)400 ;
- const float yres= 1.0f/(float)320 ;
- const __m128 ixres = _mm_set1_ps( xres );
- const __m128 iyres = _mm_set1_ps( yres );
- for( int j=0; j < 320; j++ )
- for( int i=0; i < 400; i+=4 )
- {
- __m128 a, b;
- a = _mm_set_ps( i+3, i+2, i+1, i+0 );
- a = _mm_mul_ps( a, ixres );
- a = _mm_mul_ps( a, _mm_set1_ps( 3.00f) );
- a = _mm_add_ps( a, _mm_set1_ps(-2.25f) );
- b = _mm_set1_ps( (float)j );
- b = _mm_mul_ps( b, iyres );
- b = _mm_mul_ps( b, _mm_set1_ps(-2.24f) );
- b = _mm_add_ps( b, _mm_set1_ps( 1.12f) );
- IterateMandelbrot( a, b );
- }
- std::cout << "Time taken in millisecs: SSE " << clock()-start<<endl;
- start = clock();
- for( int j=0; j < 320; j++ )
- for( int i=0; i < 400; i++ )
- {
- float a2,b2;
- a2=xres*i;
- b2=yres*j;
- a2=a2*3.00f-2.25f;
- b2=b2*-2.24f+1.12f;
- IterateMandelbrot2( a2, b2 );
- }
- std::cout << "Time taken in millisecs: FPU " << clock()-start<<endl;
- system("pause");
- return 0;
- }
- int IterateMandelbrot2( float a, float b )
- {
- float x, y, x2, y2;
- x = x2 = 0.0f;
- y = y2 = 0.0f;
- // iterate f(Z) = Z^2 + C, Z0 = 0
- for( int i=0; i< 512; i++ )
- {
- y = 2.0f*x*y+b;
- x = x2-y2+a;
- x2 = x*x;
- y2 = y*y;
- const float m2 = x2+y2;
- if( m2>4.0f )
- break;
- }
- // create color
- return 1;
- }
- void IterateMandelbrot( __m128 a, __m128 b )
- {
- __m128 x, y, x2, y2, m2;
- __m128 co, ite;
- unsigned int i;
- const __m128 one = _mm_set1_ps(1.0f);
- const __m128 th = _mm_set1_ps(4.0f);
- x = _mm_setzero_ps();
- y = _mm_setzero_ps();
- x2 = _mm_setzero_ps();
- y2 = _mm_setzero_ps();
- co = _mm_setzero_ps();
- ite = _mm_setzero_ps();
- // iterate f(Z) = Z^2 + C, Z0 = 0
- for( i=0; i < 512; i++ )
- {
- y = _mm_mul_ps( x, y );
- y = _mm_add_ps( _mm_add_ps(y,y), b );
- x = _mm_add_ps( _mm_sub_ps(x2,y2), a );
- x2 = _mm_mul_ps( x, x );
- y2 = _mm_mul_ps( y, y );
- m2 = _mm_add_ps(x2,y2);
- co = _mm_or_ps( co, _mm_cmpgt_ps( m2, th ) );
- ite = _mm_add_ps( ite, _mm_andnot_ps( co, one ) );
- if( _mm_movemask_ps( co )==0x0f )
- {
- break;
- }
- // create color
- //__m128i bb= _mm_cvtps_epi32( ite );
- //const __m128i gg = _mm_slli_si128( bb, 1 );
- //const __m128i rr = _mm_slli_si128( bb, 2 );
- //const __m128i color = _mm_or_si128( _mm_or_si128(rr,gg),bb );
- // return( bb );
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement