Mlxa

sse

Jun 15th, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #pragma GCC target("mmx,sse,sse2,ssse3,sse4.1,sse4.2,avx,avx2,fma,avx512f,avx512bw,avx512cd,avx512dq,avx512vl")
  2. #include <immintrin.h>
  3.  
  4.  
  5. double xs[128] __attribute__((aligned (64))), ys[128] __attribute__((aligned (64)));
  6.  
  7. double test_one(int i) {
  8.     double s = 1e9;
  9.     static point vec[N];
  10.     int vsize = 0;
  11.     for (int j = 0; j < N; ++j) {
  12.         if (i == j) {
  13.             continue;
  14.         }
  15.         vec[vsize] = p[i].to(p[j]);
  16.         xs[vsize] = vec[vsize].x, ys[vsize] = vec[vsize].y;
  17.         ++vsize;
  18.     }
  19.     for (int h = vsize; h < 128; ++h)
  20.         xs[h] = 1e18, ys[h] = 1e17 + 50;
  21.     for (int j = 0; j < vsize; ++j) {
  22.         __m512d xj = _mm512_set1_pd(xs[j]), yj = _mm512_set1_pd(ys[j]);
  23.         __m512d mins = _mm512_set1_pd(1e50);
  24.         for (int h = j + 1; h < vsize; h += 8) {
  25.             __m512d xc = _mm512_load_pd(xs + h), yc = _mm512_load_pd(ys + h);
  26.             __m512d ans = _mm512_abs_pd(_mm512_fmsub_pd(xc, yj, _mm512_mul_pd(yc, xj)));
  27.             mins = _mm512_min_pd(mins, ans);
  28.         }
  29.         s = min(s, _mm512_reduce_min_pd(mins));
  30.     }
  31.     return s / 2;
  32. }
Add Comment
Please, Sign In to add comment