#include #include #include #include const double pi = 3.14159265358979323846; int main(int argc, char const *argv[]) { double mux_ = 0.32; double muy_ = 0.32; double b4_ = 0.5; double X[4] = {0.1, 0.2, 0.3, 1.1}; double Y[4] = {0.0, 0.0, 0.0, 0.0}; double Z[4]; double x0, x1, px0, px1, px2, mux; double y0, y1, py0, py1, py2, muy; for (int i = 0; i < 4; i++) { x0 = X[i]; y0 = Y[i]; mux = 2.0*pi*mux_; muy = 2.0*pi*muy_; px0 = 0.0; py0 = 0.0; for (int n = 0; n < 5; n++) { if (x0*x0 + y0*y0 < 1.0) { x1 = x0 + px0; px1 = -x0 + px0; y1 = y0 + py0; py1 = -y0 + py0; px2 = px1 + b4_ * (x1*x1*x1 - 3.0*x1*y1*y1); py2 = py1 - b4_ * (y1*y1*y1 - 3.0*x1*x1*y1); x0 = x1; y0 = y1; px0 = px2; py0 = py2; } else break; Z[i] = (double) n; printf("%+f ", x0); //printf("%+f %+f %+f %+f\n", Z[0], Z[1], Z[2], Z[3]); } printf("\n"); } printf("%+f %+f %+f %+f\n", Z[0], Z[1], Z[2], Z[3]); printf("\n"); for (int i = 0; i < 4; i += 4) { __m256d x0 = _mm256_load_pd(X + i); __m256d y0 = _mm256_load_pd(Y + i); __m256d px0 = _mm256_setzero_pd(); //_mm256_set_pd(0.0, 0.0, 0.0, 1.0); __m256d py0 = _mm256_setzero_pd(); //__m256d mux = _mm256_set_pd(2.0*pi*mux_, 2.0*pi*mux_, 2.0*pi*mux_, 2.0*pi*mux_); //__m256d muy = _mm256_set_pd(2.0*pi*muy_, 2.0*pi*muy_, 2.0*pi*muy_, 2.0*pi*muy_); __m256d x0_2 = _mm256_mul_pd(x0, x0); __m256d y0_2 = _mm256_mul_pd(y0, y0); __m256d radius = _mm256_add_pd(x0_2, y0_2); __m256d ones = _mm256_set1_pd(1.0); __m256d three = _mm256_set_pd(3.0, 3.0, 3.0, 3.0); __m256d zz = _mm256_setzero_pd(); for (int n = 0; n < 5; n++) { __m256d mask = _mm256_cmp_pd(radius, ones, _CMP_LE_OS); __m256d x1 = _mm256_add_pd( x0, px0); __m256d px1 = _mm256_add_pd(-x0, px0); __m256d y1 = _mm256_add_pd( y0, py0); __m256d py1 = _mm256_add_pd(-y0, py0); __m256d x1_2 = _mm256_mul_pd(x1, x1); __m256d y1_2 = _mm256_mul_pd(y1, y1); __m256d b4 = _mm256_set_pd(b4_, b4_, b4_, b4_); __m256d px2 = px1 + b4 * (_mm256_mul_pd(x1_2, x1) - three * _mm256_mul_pd(x1, y1_2)); __m256d py2 = py1 - b4 * (_mm256_mul_pd(y1_2, y1) - three * _mm256_mul_pd(y1, x1_2)); double* res1 = (double*)&x0; printf("X0: %+f %+f %+f %+f \n", res1[0], res1[1], res1[2], res1[3]); double* res2 = (double*)&x1; printf("X1: %+f %+f %+f %+f \n", res2[0], res2[1], res2[2], res2[3]); x0 = x1; y0 = y1; px0 = px2; py0 = py2; zz = _mm256_blendv_pd(zz, ones, mask); _mm256_store_pd(Z + i, zz); printf("\n%f %f %f %f\n", Z[0], Z[1], Z[2], Z[3]); } } //printf("\n%f %f %f %f\n", Z[0], Z[1], Z[2], Z[3]); return 0; }