// newmond.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <xmmintrin.h>
#include <emmintrin.h>
#include <ctime>
#include <cstdlib>
#include <iostream>
using namespace std;
void IterateMandelbrot( __m128 a, __m128 b );
int IterateMandelbrot2( float a, float b );
int _tmain(int argc, _TCHAR* argv[])
{
unsigned int start = clock();
const float xres= 1.0f/(float)400 ;
const float yres= 1.0f/(float)320 ;
const __m128 ixres = _mm_set1_ps( xres );
const __m128 iyres = _mm_set1_ps( yres );
for( int j=0; j < 320; j++ )
for( int i=0; i < 400; i+=4 )
{
__m128 a, b;
a = _mm_set_ps( i+3, i+2, i+1, i+0 );
a = _mm_mul_ps( a, ixres );
a = _mm_mul_ps( a, _mm_set1_ps( 3.00f) );
a = _mm_add_ps( a, _mm_set1_ps(-2.25f) );
b = _mm_set1_ps( (float)j );
b = _mm_mul_ps( b, iyres );
b = _mm_mul_ps( b, _mm_set1_ps(-2.24f) );
b = _mm_add_ps( b, _mm_set1_ps( 1.12f) );
IterateMandelbrot( a, b );
}
std::cout << "Time taken in millisecs: SSE " << clock()-start<<endl;
start = clock();
for( int j=0; j < 320; j++ )
for( int i=0; i < 400; i++ )
{
float a2,b2;
a2=xres*i;
b2=yres*j;
a2=a2*3.00f-2.25f;
b2=b2*-2.24f+1.12f;
IterateMandelbrot2( a2, b2 );
}
std::cout << "Time taken in millisecs: FPU " << clock()-start<<endl;
system("pause");
return 0;
}
int IterateMandelbrot2( float a, float b )
{
float x, y, x2, y2;
x = x2 = 0.0f;
y = y2 = 0.0f;
// iterate f(Z) = Z^2 + C, Z0 = 0
for( int i=0; i< 512; i++ )
{
y = 2.0f*x*y+b;
x = x2-y2+a;
x2 = x*x;
y2 = y*y;
const float m2 = x2+y2;
if( m2>4.0f )
break;
}
// create color
return 1;
}
void IterateMandelbrot( __m128 a, __m128 b )
{
__m128 x, y, x2, y2, m2;
__m128 co, ite;
unsigned int i;
const __m128 one = _mm_set1_ps(1.0f);
const __m128 th = _mm_set1_ps(4.0f);
x = _mm_setzero_ps();
y = _mm_setzero_ps();
x2 = _mm_setzero_ps();
y2 = _mm_setzero_ps();
co = _mm_setzero_ps();
ite = _mm_setzero_ps();
// iterate f(Z) = Z^2 + C, Z0 = 0
for( i=0; i < 512; i++ )
{
y = _mm_mul_ps( x, y );
y = _mm_add_ps( _mm_add_ps(y,y), b );
x = _mm_add_ps( _mm_sub_ps(x2,y2), a );
x2 = _mm_mul_ps( x, x );
y2 = _mm_mul_ps( y, y );
m2 = _mm_add_ps(x2,y2);
co = _mm_or_ps( co, _mm_cmpgt_ps( m2, th ) );
ite = _mm_add_ps( ite, _mm_andnot_ps( co, one ) );
if( _mm_movemask_ps( co )==0x0f )
{
break;
}
// create color
//__m128i bb= _mm_cvtps_epi32( ite );
//const __m128i gg = _mm_slli_si128( bb, 1 );
//const __m128i rr = _mm_slli_si128( bb, 2 );
//const __m128i color = _mm_or_si128( _mm_or_si128(rr,gg),bb );
// return( bb );
}
}