Untitled

#include "mandelbrot.h"
#include <xmmintrin.h>

// cubic_mandelbrot() takes an array of SIZE (x,y) coordinates --- these are
// actually complex numbers x + yi, but we can view them as points on a plane.
// It then executes 200 iterations of f, using the <x,y> point, and checks
// the magnitude of the result; if the magnitude is over 2.0, it assumes
// that the function will diverge to infinity.

// vectorize the code below using SIMD intrinsics
// int *
// cubic_mandelbrot_vector(float x[SIZE], float y[SIZE]) {
//     static int ret[SIZE];
//     float x1, y1, x2, y2;
//
//     for (int i = 0; i < SIZE; i ++) {
//         x1 = y1 = 0.0;
//
//         // Run M_ITER iterations
//         for (int j = 0; j < M_ITER; j ++) {
//             // Calculate x1^2 and y1^2
//             float x1_squared = x1 * x1;
//             float y1_squared = y1 * y1;
//
//             // Calculate the real piece of (x1 + (y1*i))^3 + (x + (y*i))
//             x2 = x1 * (x1_squared - 3 * y1_squared) + x[i];
//
//             // Calculate the imaginary portion of (x1 + (y1*i))^3 + (x + (y*i))
//             y2 = y1 * (3 * x1_squared - y1_squared) + y[i];
//
//             // Use the resulting complex number as the input for the next
//             // iteration
//             x1 = x2;
//             y1 = y2;
//         }
//
//         // caculate the magnitude of the result;
//         // we could take the square root, but we instead just
//         // compare squares
//         ret[i] = ((x2 * x2) + (y2 * y2)) < (M_MAG * M_MAG);
//     }
//
//     return ret;
// }


int *
cubic_mandelbrot_vector(float x[SIZE], float y[SIZE]) {
    static int ret[SIZE];
    //float x1, y1, x2, y2;
    float temp[4];          //added
    __m128 acc, XI, YI, X1, X2, Y1, Y2, X1_SQUARED, Y1_SQUARED, tempret; //added

    for (int i = 0; i < SIZE; i += 4) {
        //x1 = y1 = 0.0;
        X1 = _mm_set1_ps(0.0);  //added
        Y1 = _mm_set1_ps(0.0);  //added
        acc = _mm_set1_ps(0.0); //added
        XI = _mm_loadu_ps(&x[i]);
        YI = _mm_loadu_ps(&y[i]);


        // Run M_ITER iterations
        for (int j = 0; j < M_ITER; j ++) {
            // Calculate x1^2 and y1^2
            //float x1_squared = x1 * x1;
            //float y1_squared = y1 * y1;

            //added
            X1_SQUARED = _mm_mul_ps(X1, X1);  //x1 * x1
            Y1_SQUARED = _mm_mul_ps(Y1, Y1);  //y1*y1
            //end add

            // Calculate the real piece of (x1 + (y1*i))^3 + (x + (y*i))
            //x2 = x1 * (x1_squared - 3 * y1_squared) + x[i];

            //added
            acc = _mm_mul_ps(_mm_set1_ps(3.0), Y1_SQUARED); //3*y1_squared
            acc = _mm_sub_ps(X1_SQUARED, acc);  //x1squared - 3y1square
            X2 = _mm_mul_ps(X1, acc); //x1*()
            X2 = _mm_add_ps(X2, XI); //+x[i]
            //end add

            // Calculate the imaginary portion of (x1 + (y1*i))^3 + (x + (y*i))
            //y2 = y1 * (3 * x1_squared - y1_squared) + y[i];
            //added
            acc = _mm_mul_ps(_mm_set1_ps(3.0), X1_SQUARED); //3*x1_squared
            acc = _mm_sub_ps(acc, Y1_SQUARED);  //3x1squared - y1square
            Y2 = _mm_mul_ps(Y1, acc); //y1*()
            Y2 = _mm_add_ps(Y2, YI); //+y[i]
            //end add

            // Use the resulting complex number as the input for the next
            // iteration
            //x1 = x2;
            X1 = X2;  //added x1 = x2

            //y1 = y2;
            Y1 = Y2; //added y1 = y2
        }

        // caculate the magnitude of the result;
        // we could take the square root, but we instead just
        // compare squares
        //ret[i] = ((x2 * x2) + (y2 * y2)) < (M_MAG * M_MAG);

        tempret = _mm_mul_ps(X2, X2);
        tempret= _mm_add_ps(tempret, (_mm_mul_ps(Y2, Y2)));
        tempret = _mm_cmplt_ps(tempret, (_mm_set1_ps(M_MAG*M_MAG)));
        _mm_storeu_ps(temp, tempret);
        ret[i] = temp[0];
        ret[i+1] = temp[1];
        ret[i+2] = temp[2];
        ret[i+3] = temp[3];


    }

    return ret;
}