Advertisement
Guest User

Untitled

a guest
Apr 11th, 2012
222
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.02 KB | None | 0 0
  1. // newmond.cpp : Defines the entry point for the console application.
  2. //
  3.  
  4. #include "stdafx.h"
  5.  
  6. #include <xmmintrin.h>
  7. #include <emmintrin.h>
  8. #include <ctime>
  9. #include <cstdlib>
  10. #include <iostream>
  11. using namespace std;
  12.  
  13. void IterateMandelbrot( __m128 a, __m128 b );
  14. int IterateMandelbrot2( float a, float b );
  15.  
  16. int _tmain(int argc, _TCHAR* argv[])
  17. {
  18.  
  19.     unsigned int start = clock();
  20.  
  21.     const float xres= 1.0f/(float)400 ;
  22.     const float yres= 1.0f/(float)320 ;
  23.  
  24.     const __m128 ixres = _mm_set1_ps( xres );
  25.     const __m128 iyres = _mm_set1_ps( yres );
  26.  
  27.     for( int j=0; j < 320; j++ )
  28.     for( int i=0; i < 400; i+=4 )
  29.     {
  30.         __m128  a, b;
  31.        
  32.         a = _mm_set_ps( i+3, i+2, i+1, i+0 );
  33.         a = _mm_mul_ps( a, ixres );
  34.         a = _mm_mul_ps( a, _mm_set1_ps( 3.00f) );
  35.         a = _mm_add_ps( a, _mm_set1_ps(-2.25f) );
  36.  
  37.         b = _mm_set1_ps( (float)j );
  38.         b = _mm_mul_ps( b, iyres );
  39.         b = _mm_mul_ps( b, _mm_set1_ps(-2.24f) );
  40.         b = _mm_add_ps( b, _mm_set1_ps( 1.12f) );
  41.  
  42.        IterateMandelbrot( a, b );
  43.        
  44.     }
  45.     std::cout << "Time taken in millisecs: SSE " << clock()-start<<endl;
  46.     start = clock();
  47.    
  48.     for( int j=0; j < 320; j++ )
  49.     for( int i=0; i < 400; i++ )
  50.     {
  51.         float a2,b2;
  52.         a2=xres*i;
  53.         b2=yres*j;
  54.  
  55.         a2=a2*3.00f-2.25f;
  56.         b2=b2*-2.24f+1.12f;
  57.         IterateMandelbrot2( a2, b2 );
  58.     }
  59.     std::cout << "Time taken in millisecs: FPU " << clock()-start<<endl;
  60.     system("pause");
  61.  
  62.  
  63.     return 0;
  64. }
  65.  
  66. int IterateMandelbrot2( float a, float b )
  67. {
  68.     float x, y, x2, y2;
  69.  
  70.     x = x2 = 0.0f;
  71.     y = y2 = 0.0f;
  72.  
  73.     // iterate f(Z) = Z^2 + C,  Z0 = 0
  74.     for( int i=0; i< 512; i++ )
  75.     {
  76.         y = 2.0f*x*y+b;
  77.         x = x2-y2+a;
  78.  
  79.         x2 = x*x;
  80.         y2 = y*y;
  81.  
  82.         const float m2 = x2+y2;
  83.         if( m2>4.0f )
  84.             break;
  85.     }
  86.  
  87.     // create color
  88.     return 1;
  89. }
  90.  
  91.  
  92.  
  93. void IterateMandelbrot( __m128 a, __m128 b )
  94. {
  95.     __m128  x, y, x2, y2, m2;
  96.     __m128  co, ite;
  97.  
  98.     unsigned int i;
  99.  
  100.     const __m128 one = _mm_set1_ps(1.0f);
  101.     const __m128 th  = _mm_set1_ps(4.0f);
  102.  
  103.     x   = _mm_setzero_ps();
  104.     y   = _mm_setzero_ps();
  105.     x2  = _mm_setzero_ps();
  106.     y2  = _mm_setzero_ps();
  107.     co  = _mm_setzero_ps();
  108.     ite = _mm_setzero_ps();
  109.  
  110.     // iterate f(Z) = Z^2 + C,  Z0 = 0
  111.     for( i=0; i < 512; i++ )
  112.         {
  113.         y  = _mm_mul_ps( x, y );
  114.         y  = _mm_add_ps( _mm_add_ps(y,y),   b );
  115.         x  = _mm_add_ps( _mm_sub_ps(x2,y2), a );
  116.  
  117.         x2 = _mm_mul_ps( x, x );
  118.         y2 = _mm_mul_ps( y, y );
  119.  
  120.         m2 = _mm_add_ps(x2,y2);
  121.         co = _mm_or_ps( co, _mm_cmpgt_ps( m2, th ) );
  122.  
  123.  
  124.         ite = _mm_add_ps( ite, _mm_andnot_ps( co, one ) );
  125.         if( _mm_movemask_ps( co )==0x0f )
  126.         {
  127.            break;
  128.         }
  129.        
  130.  
  131.     // create color
  132.     //__m128i bb= _mm_cvtps_epi32( ite );
  133.     //const __m128i gg = _mm_slli_si128( bb, 1 );
  134.     //const __m128i rr = _mm_slli_si128( bb, 2 );
  135.     //const __m128i color = _mm_or_si128( _mm_or_si128(rr,gg),bb );
  136.  
  137.    // return( bb );
  138.  
  139. }
  140. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement