Advertisement
Sirflankalot

AOS3 -> SOA

Oct 22nd, 2019
218
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.27 KB | None | 0 0
  1. const pik::SIMD_PART(int64_t, 2) i64x2;
  2. const pik::SIMD_FULL(int64_t) i64xN;
  3. for(; i + i64xN.N <= size; i += i64xN.N) {
  4.     int64_t* base = (int64_t*) &aos[i];
  5.     const auto vl1 = pik::load(i64x2, base + 0); // [a1, b1]
  6.     const auto vl2 = pik::load(i64x2, base + 2); // [c1, a2]
  7.     const auto vl3 = pik::load(i64x2, base + 4); // [b2, c2]
  8.  
  9.     const auto tl1 = pik::concat_hi_lo(vl2, vl1); // [a1, a2]
  10.     const auto tl2 = pik::concat_lo_hi(vl3, vl1); // [b1, b2]
  11.     const auto tl3 = pik::concat_hi_lo(vl3, vl2); // [c1, c2]
  12.  
  13.     #if SIMD_TARGET_WIDTH == 128
  14.     const auto t1 = tl1;
  15.     const auto t2 = tl1;
  16.     const auto t3 = tl1;
  17.     #elif SIMD_TARGET_WIDTH == 256
  18.     base = (int64_t*) &aos[i + 2];
  19.     const auto vh1 = pik::load(i64x2, base + 0); // [a1, b1]
  20.     const auto vh2 = pik::load(i64x2, base + 2); // [c1, a2]
  21.     const auto vh3 = pik::load(i64x2, base + 4); // [b2, c2]
  22.  
  23.     const auto th1 = pik::concat_hi_lo(vh2, vh1); // [a1, a2]
  24.     const auto th2 = pik::concat_lo_hi(vh3, vh1); // [b1, b2]
  25.     const auto th3 = pik::concat_hi_lo(vh3, vh2); // [c1, c2]
  26.     auto t1 = pik::combine(i64xN, th1, tl1);
  27.     auto t2 = pik::combine(i64xN, th2, tl2);
  28.     auto t3 = pik::combine(i64xN, th3, tl3);
  29.     #endif
  30.  
  31.     pik::store(t1, i64xN, &soa.a[i]);
  32.     pik::store(t2, i64xN, &soa.b[i]);
  33.     pik::store(t3, i64xN, &soa.c[i]);
  34. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement