Advertisement
Guest User

VecAdd64 from packed 32-bit

a guest
Dec 15th, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.36 KB | None | 0 0
  1. inline uint32x4_p VecAdd64(const uint32x4_p a, const uint32x4_p b)
  2. {
  3. #if 0
  4.     const uint32x4_p t1 = { 0, 0, -1, -1};
  5.     const uint32x4_p t2 = { 0, 0, -1, -1};
  6.     const uint32x4_p cx = vec_addc(t1, t2);
  7.     const uint32x4_p t3 = vec_add (t1, t2);
  8.  
  9.     const uint8x16_p m = {16,16,16,16, 0,1,2,3, 16,16,16,16, 12,13,14,15};
  10.     const uint32x4_p z = {0,0,0,0};
  11.     const uint32x4_p o = vec_perm(cx, z, m);
  12.     const uint32x4_p t4 = vec_add(t3, o);
  13. #endif
  14.  
  15. #if defined(CRYPTOPP_POWER7_AVAILABLE)
  16.     return (uint32x4_p)vec_add((uint64x2_p)a, (uint64x2_p)b);
  17. #else
  18.     // The carry vector has a 1 set if the lane produces a carry.
  19.     // The 32-bit words are word-swapped for 64 bits. If lanes are
  20.     // numbered left to right, then the carries produced by lanes
  21.     // 0 and 2 get carried into lanes 1 and 3.
  22. # if defined(CRYPTOPP_BIG_ENDIAN)
  23.     const uint8x16_p mask = {0,1,2,3, 16,16,16,16, 12,13,14,15, 16,16,16,16};
  24.     const uint32x4_p zero = {0,0,0,0};
  25. # else  // Should this be 0,1,2,3 and 8,9,10,11
  26.     const uint8x16_p mask = {16,16,16,16, 0,1,2,3, 16,16,16,16, 12,13,14,15};
  27.     const uint32x4_p zero = {0,0,0,0};
  28. # endif
  29.  
  30.     const uint32x4_p cry = vec_addc(a, b);
  31.     const uint32x4_p res = vec_add (a, b);
  32.     const uint32x4_p tt = vec_perm(cry, zero, mask); // Debug
  33.     return (uint32x4_p)vec_add(res, vec_perm(cry, zero, mask));
  34. #endif
  35. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement