Advertisement
Guest User

Untitled

a guest
Dec 6th, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.05 KB | None | 0 0
  1. #include "mandelbrot.h"
  2. #include <xmmintrin.h>
  3.  
  4. // cubic_mandelbrot() takes an array of SIZE (x,y) coordinates --- these are
  5. // actually complex numbers x + yi, but we can view them as points on a plane.
  6. // It then executes 200 iterations of f, using the <x,y> point, and checks
  7. // the magnitude of the result; if the magnitude is over 2.0, it assumes
  8. // that the function will diverge to infinity.
  9.  
  10. // vectorize the code below using SIMD intrinsics
  11. // int *
  12. // cubic_mandelbrot_vector(float x[SIZE], float y[SIZE]) {
  13. // static int ret[SIZE];
  14. // float x1, y1, x2, y2;
  15. //
  16. // for (int i = 0; i < SIZE; i ++) {
  17. // x1 = y1 = 0.0;
  18. //
  19. // // Run M_ITER iterations
  20. // for (int j = 0; j < M_ITER; j ++) {
  21. // // Calculate x1^2 and y1^2
  22. // float x1_squared = x1 * x1;
  23. // float y1_squared = y1 * y1;
  24. //
  25. // // Calculate the real piece of (x1 + (y1*i))^3 + (x + (y*i))
  26. // x2 = x1 * (x1_squared - 3 * y1_squared) + x[i];
  27. //
  28. // // Calculate the imaginary portion of (x1 + (y1*i))^3 + (x + (y*i))
  29. // y2 = y1 * (3 * x1_squared - y1_squared) + y[i];
  30. //
  31. // // Use the resulting complex number as the input for the next
  32. // // iteration
  33. // x1 = x2;
  34. // y1 = y2;
  35. // }
  36. //
  37. // // caculate the magnitude of the result;
  38. // // we could take the square root, but we instead just
  39. // // compare squares
  40. // ret[i] = ((x2 * x2) + (y2 * y2)) < (M_MAG * M_MAG);
  41. // }
  42. //
  43. // return ret;
  44. // }
  45.  
  46.  
  47.  
  48. int *
  49. cubic_mandelbrot_vector(float x[SIZE], float y[SIZE]) {
  50. static int ret[SIZE];
  51. //float x1, y1, x2, y2;
  52. float temp[4]; //added
  53. __m128 acc, XI, YI, X1, X2, Y1, Y2, X1_SQUARED, Y1_SQUARED, tempret; //added
  54.  
  55. for (int i = 0; i < SIZE; i += 4) {
  56. //x1 = y1 = 0.0;
  57. X1 = _mm_set1_ps(0.0); //added
  58. Y1 = _mm_set1_ps(0.0); //added
  59. acc = _mm_set1_ps(0.0); //added
  60. XI = _mm_loadu_ps(&x[i]);
  61. YI = _mm_loadu_ps(&y[i]);
  62.  
  63.  
  64. // Run M_ITER iterations
  65. for (int j = 0; j < M_ITER; j ++) {
  66. // Calculate x1^2 and y1^2
  67. //float x1_squared = x1 * x1;
  68. //float y1_squared = y1 * y1;
  69.  
  70. //added
  71. X1_SQUARED = _mm_mul_ps(X1, X1); //x1 * x1
  72. Y1_SQUARED = _mm_mul_ps(Y1, Y1); //y1*y1
  73. //end add
  74.  
  75. // Calculate the real piece of (x1 + (y1*i))^3 + (x + (y*i))
  76. //x2 = x1 * (x1_squared - 3 * y1_squared) + x[i];
  77.  
  78. //added
  79. acc = _mm_mul_ps(_mm_set1_ps(3.0), Y1_SQUARED); //3*y1_squared
  80. acc = _mm_sub_ps(X1_SQUARED, acc); //x1squared - 3y1square
  81. X2 = _mm_mul_ps(X1, acc); //x1*()
  82. X2 = _mm_add_ps(X2, XI); //+x[i]
  83. //end add
  84.  
  85. // Calculate the imaginary portion of (x1 + (y1*i))^3 + (x + (y*i))
  86. //y2 = y1 * (3 * x1_squared - y1_squared) + y[i];
  87. //added
  88. acc = _mm_mul_ps(_mm_set1_ps(3.0), X1_SQUARED); //3*x1_squared
  89. acc = _mm_sub_ps(acc, Y1_SQUARED); //3x1squared - y1square
  90. Y2 = _mm_mul_ps(Y1, acc); //y1*()
  91. Y2 = _mm_add_ps(Y2, YI); //+y[i]
  92. //end add
  93.  
  94. // Use the resulting complex number as the input for the next
  95. // iteration
  96. //x1 = x2;
  97. X1 = X2; //added x1 = x2
  98.  
  99. //y1 = y2;
  100. Y1 = Y2; //added y1 = y2
  101. }
  102.  
  103. // caculate the magnitude of the result;
  104. // we could take the square root, but we instead just
  105. // compare squares
  106. //ret[i] = ((x2 * x2) + (y2 * y2)) < (M_MAG * M_MAG);
  107.  
  108. tempret = _mm_mul_ps(X2, X2);
  109. tempret= _mm_add_ps(tempret, (_mm_mul_ps(Y2, Y2)));
  110. tempret = _mm_cmplt_ps(tempret, (_mm_set1_ps(M_MAG*M_MAG)));
  111. _mm_storeu_ps(temp, tempret);
  112. ret[i] = temp[0];
  113. ret[i+1] = temp[1];
  114. ret[i+2] = temp[2];
  115. ret[i+3] = temp[3];
  116.  
  117.  
  118. }
  119.  
  120. return ret;
  121. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement