Advertisement
zhangsongcui

memeq_alligned

Jun 3rd, 2022
748
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.39 KB | None | 0 0
  1. static inline constexpr uintptr_t roundup_n(uintptr_t a, uintptr_t n) {
  2.     return ((a + (n - 1)) & ~(n - 1));
  3. }
  4.  
  5. static inline constexpr uintptr_t rounddown_n(uintptr_t a, uintptr_t n) {
  6.     return (a & ~(n - 1));
  7. }
  8.  
  9. static inline bool isaligned_n(uintptr_t ptr, uintptr_t n) {
  10.     return ((ptr & ((n)-1)) == 0);
  11. }
  12.  
  13. static __m128i _mm_alignr_epi8_nonconst(__m128i a, __m128i b, int n) {
  14. #define ALIGNR_CASE(n) case n: return _mm_alignr_epi8(a, b, n); break;
  15.     switch (n) {
  16.         ALIGNR_CASE(0x0);
  17.         ALIGNR_CASE(0x1);
  18.         ALIGNR_CASE(0x2);
  19.         ALIGNR_CASE(0x3);
  20.         ALIGNR_CASE(0x4);
  21.         ALIGNR_CASE(0x5);
  22.         ALIGNR_CASE(0x6);
  23.         ALIGNR_CASE(0x7);
  24.         ALIGNR_CASE(0x8);
  25.         ALIGNR_CASE(0x9);
  26.         ALIGNR_CASE(0xA);
  27.         ALIGNR_CASE(0xB);
  28.         ALIGNR_CASE(0xC);
  29.         ALIGNR_CASE(0xD);
  30.         ALIGNR_CASE(0xE);
  31.         ALIGNR_CASE(0xF);
  32.     }
  33. #ifdef _MSC_VER
  34.     _assume(0);
  35. #else
  36.     __builtin_unreachable();
  37. #endif
  38. }
  39.  
  40. bool memeq(const uint8_t* pu1, const uint8_t* pu2, size_t n) {
  41.     auto* pa1 = (const uint8_t*)rounddown_n((uintptr_t)pu1, 16);
  42.     auto* pa2 = (const uint8_t*)rounddown_n((uintptr_t)pu2, 16);
  43.     auto diff1 = pu1 - pa1;
  44.     auto diff2 = pu2 - pa2;
  45.  
  46.     auto xmm1_1 = _mm_load_si128((const __m128i*)pa1);
  47.     auto xmm2_1 = _mm_load_si128((const __m128i*)pa2);
  48.  
  49.     size_t off;
  50.     for (off = sizeof(__m128i); off <= n; off += sizeof(__m128i)) {
  51.         auto xmm1_2 = _mm_load_si128((const __m128i*)(pa1 + off));
  52.         auto xmm2_2 = _mm_load_si128((const __m128i*)(pa2 + off));
  53.         auto xmm1_a = _mm_alignr_epi8_nonconst(xmm1_2, xmm1_1, diff1);
  54.         auto xmm2_a = _mm_alignr_epi8_nonconst(xmm2_2, xmm2_1, diff2);
  55.         if (!_mm_test_all_ones(_mm_cmpeq_epi8(xmm1_a, xmm2_a))) return false;
  56.         xmm1_1 = xmm1_2;
  57.         xmm2_1 = xmm2_2;
  58.     }
  59.  
  60.     if (uint16_t left = n + sizeof(__m128i) - off; left > 0) {
  61.         auto xmm1_2 = _mm_load_si128((const __m128i*)(pa1 + off));
  62.         auto xmm2_2 = _mm_load_si128((const __m128i*)(pa2 + off));
  63.         auto xmm1_a = _mm_alignr_epi8_nonconst(xmm1_2, xmm1_1, diff1);
  64.         auto xmm2_a = _mm_alignr_epi8_nonconst(xmm2_2, xmm2_1, diff2);
  65.         auto res = _mm_cmpeq_epi8(xmm1_a, xmm2_a);
  66.         auto mask = (uint16_t)_mm_movemask_epi8(res);
  67.         uint16_t bits = (1 << left) - 1;
  68.         return (mask & bits) == bits;
  69.     }
  70.  
  71.     return true;
  72. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement