Guest User

Untitled

a guest
Apr 16th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.17 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdint.h>
  3. #include <stdlib.h>
  4. #include <immintrin.h>
  5.  
  6. const double pi = 3.14159265358979323846;
  7.  
  8. int main(int argc, char const *argv[])
  9. {
  10.  
  11. double mux_ = 0.32;
  12. double muy_ = 0.32;
  13. double b4_ = 0.5;
  14.  
  15. double X[4] = {0.1, 0.2, 0.3, 1.1};
  16. double Y[4] = {0.0, 0.0, 0.0, 0.0};
  17. double Z[4];
  18.  
  19. double x0, x1, px0, px1, px2, mux;
  20. double y0, y1, py0, py1, py2, muy;
  21.  
  22. for (int i = 0; i < 4; i++) {
  23. x0 = X[i];
  24. y0 = Y[i];
  25. mux = 2.0*pi*mux_;
  26. muy = 2.0*pi*muy_;
  27. px0 = 0.0;
  28. py0 = 0.0;
  29.  
  30. for (int n = 0; n < 5; n++) {
  31. if (x0*x0 + y0*y0 < 1.0) {
  32. x1 = x0 + px0;
  33. px1 = -x0 + px0;
  34. y1 = y0 + py0;
  35. py1 = -y0 + py0;
  36. px2 = px1 + b4_ * (x1*x1*x1 - 3.0*x1*y1*y1);
  37. py2 = py1 - b4_ * (y1*y1*y1 - 3.0*x1*x1*y1);
  38. x0 = x1; y0 = y1;
  39. px0 = px2; py0 = py2;
  40. }
  41. else
  42. break;
  43. Z[i] = (double) n;
  44. printf("%+f ", x0);
  45. //printf("%+f %+f %+f %+f\n", Z[0], Z[1], Z[2], Z[3]);
  46. }
  47. printf("\n");
  48. }
  49. printf("%+f %+f %+f %+f\n", Z[0], Z[1], Z[2], Z[3]);
  50. printf("\n");
  51.  
  52. for (int i = 0; i < 4; i += 4) {
  53. __m256d x0 = _mm256_load_pd(X + i);
  54. __m256d y0 = _mm256_load_pd(Y + i);
  55. __m256d px0 = _mm256_setzero_pd(); //_mm256_set_pd(0.0, 0.0, 0.0, 1.0);
  56. __m256d py0 = _mm256_setzero_pd();
  57. //__m256d mux = _mm256_set_pd(2.0*pi*mux_, 2.0*pi*mux_, 2.0*pi*mux_, 2.0*pi*mux_);
  58. //__m256d muy = _mm256_set_pd(2.0*pi*muy_, 2.0*pi*muy_, 2.0*pi*muy_, 2.0*pi*muy_);
  59.  
  60. __m256d x0_2 = _mm256_mul_pd(x0, x0);
  61. __m256d y0_2 = _mm256_mul_pd(y0, y0);
  62. __m256d radius = _mm256_add_pd(x0_2, y0_2);
  63.  
  64. __m256d ones = _mm256_set1_pd(1.0);
  65. __m256d three = _mm256_set_pd(3.0, 3.0, 3.0, 3.0);
  66.  
  67. __m256d zz = _mm256_setzero_pd();
  68.  
  69. for (int n = 0; n < 5; n++) {
  70. __m256d mask = _mm256_cmp_pd(radius, ones, _CMP_LE_OS);
  71.  
  72. __m256d x1 = _mm256_add_pd( x0, px0);
  73. __m256d px1 = _mm256_add_pd(-x0, px0);
  74. __m256d y1 = _mm256_add_pd( y0, py0);
  75. __m256d py1 = _mm256_add_pd(-y0, py0);
  76.  
  77. __m256d x1_2 = _mm256_mul_pd(x1, x1);
  78. __m256d y1_2 = _mm256_mul_pd(y1, y1);
  79. __m256d b4 = _mm256_set_pd(b4_, b4_, b4_, b4_);
  80. __m256d px2 = px1 + b4 * (_mm256_mul_pd(x1_2, x1) - three * _mm256_mul_pd(x1, y1_2));
  81. __m256d py2 = py1 - b4 * (_mm256_mul_pd(y1_2, y1) - three * _mm256_mul_pd(y1, x1_2));
  82.  
  83. double* res1 = (double*)&x0;
  84. printf("X0: %+f %+f %+f %+f \n", res1[0], res1[1], res1[2], res1[3]);
  85. double* res2 = (double*)&x1;
  86. printf("X1: %+f %+f %+f %+f \n", res2[0], res2[1], res2[2], res2[3]);
  87. x0 = x1; y0 = y1;
  88. px0 = px2; py0 = py2;
  89. zz = _mm256_blendv_pd(zz, ones, mask);
  90.  
  91.  
  92. _mm256_store_pd(Z + i, zz);
  93. printf("\n%f %f %f %f\n", Z[0], Z[1], Z[2], Z[3]);
  94. }
  95.  
  96. }
  97.  
  98. //printf("\n%f %f %f %f\n", Z[0], Z[1], Z[2], Z[3]);
  99.  
  100. return 0;
  101. }
Add Comment
Please, Sign In to add comment