Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdint.h>
- #include <stdlib.h>
- #include <immintrin.h>
- const double pi = 3.14159265358979323846;
- int main(int argc, char const *argv[])
- {
- double mux_ = 0.32;
- double muy_ = 0.32;
- double b4_ = 0.5;
- double X[4] = {0.1, 0.2, 0.3, 1.1};
- double Y[4] = {0.0, 0.0, 0.0, 0.0};
- double Z[4];
- double x0, x1, px0, px1, px2, mux;
- double y0, y1, py0, py1, py2, muy;
- for (int i = 0; i < 4; i++) {
- x0 = X[i];
- y0 = Y[i];
- mux = 2.0*pi*mux_;
- muy = 2.0*pi*muy_;
- px0 = 0.0;
- py0 = 0.0;
- for (int n = 0; n < 5; n++) {
- if (x0*x0 + y0*y0 < 1.0) {
- x1 = x0 + px0;
- px1 = -x0 + px0;
- y1 = y0 + py0;
- py1 = -y0 + py0;
- px2 = px1 + b4_ * (x1*x1*x1 - 3.0*x1*y1*y1);
- py2 = py1 - b4_ * (y1*y1*y1 - 3.0*x1*x1*y1);
- x0 = x1; y0 = y1;
- px0 = px2; py0 = py2;
- }
- else
- break;
- Z[i] = (double) n;
- printf("%+f ", x0);
- //printf("%+f %+f %+f %+f\n", Z[0], Z[1], Z[2], Z[3]);
- }
- printf("\n");
- }
- printf("%+f %+f %+f %+f\n", Z[0], Z[1], Z[2], Z[3]);
- printf("\n");
- for (int i = 0; i < 4; i += 4) {
- __m256d x0 = _mm256_load_pd(X + i);
- __m256d y0 = _mm256_load_pd(Y + i);
- __m256d px0 = _mm256_setzero_pd(); //_mm256_set_pd(0.0, 0.0, 0.0, 1.0);
- __m256d py0 = _mm256_setzero_pd();
- //__m256d mux = _mm256_set_pd(2.0*pi*mux_, 2.0*pi*mux_, 2.0*pi*mux_, 2.0*pi*mux_);
- //__m256d muy = _mm256_set_pd(2.0*pi*muy_, 2.0*pi*muy_, 2.0*pi*muy_, 2.0*pi*muy_);
- __m256d x0_2 = _mm256_mul_pd(x0, x0);
- __m256d y0_2 = _mm256_mul_pd(y0, y0);
- __m256d radius = _mm256_add_pd(x0_2, y0_2);
- __m256d ones = _mm256_set1_pd(1.0);
- __m256d three = _mm256_set_pd(3.0, 3.0, 3.0, 3.0);
- __m256d zz = _mm256_setzero_pd();
- for (int n = 0; n < 5; n++) {
- __m256d mask = _mm256_cmp_pd(radius, ones, _CMP_LE_OS);
- __m256d x1 = _mm256_add_pd( x0, px0);
- __m256d px1 = _mm256_add_pd(-x0, px0);
- __m256d y1 = _mm256_add_pd( y0, py0);
- __m256d py1 = _mm256_add_pd(-y0, py0);
- __m256d x1_2 = _mm256_mul_pd(x1, x1);
- __m256d y1_2 = _mm256_mul_pd(y1, y1);
- __m256d b4 = _mm256_set_pd(b4_, b4_, b4_, b4_);
- __m256d px2 = px1 + b4 * (_mm256_mul_pd(x1_2, x1) - three * _mm256_mul_pd(x1, y1_2));
- __m256d py2 = py1 - b4 * (_mm256_mul_pd(y1_2, y1) - three * _mm256_mul_pd(y1, x1_2));
- double* res1 = (double*)&x0;
- printf("X0: %+f %+f %+f %+f \n", res1[0], res1[1], res1[2], res1[3]);
- double* res2 = (double*)&x1;
- printf("X1: %+f %+f %+f %+f \n", res2[0], res2[1], res2[2], res2[3]);
- x0 = x1; y0 = y1;
- px0 = px2; py0 = py2;
- zz = _mm256_blendv_pd(zz, ones, mask);
- _mm256_store_pd(Z + i, zz);
- printf("\n%f %f %f %f\n", Z[0], Z[1], Z[2], Z[3]);
- }
- }
- //printf("\n%f %f %f %f\n", Z[0], Z[1], Z[2], Z[3]);
- return 0;
- }
Add Comment
Please, Sign In to add comment