Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- static inline constexpr uintptr_t roundup_n(uintptr_t a, uintptr_t n) {
- return ((a + (n - 1)) & ~(n - 1));
- }
- static inline constexpr uintptr_t rounddown_n(uintptr_t a, uintptr_t n) {
- return (a & ~(n - 1));
- }
- static inline bool isaligned_n(uintptr_t ptr, uintptr_t n) {
- return ((ptr & ((n)-1)) == 0);
- }
- static __m128i _mm_alignr_epi8_nonconst(__m128i a, __m128i b, int n) {
- #define ALIGNR_CASE(n) case n: return _mm_alignr_epi8(a, b, n); break;
- switch (n) {
- ALIGNR_CASE(0x0);
- ALIGNR_CASE(0x1);
- ALIGNR_CASE(0x2);
- ALIGNR_CASE(0x3);
- ALIGNR_CASE(0x4);
- ALIGNR_CASE(0x5);
- ALIGNR_CASE(0x6);
- ALIGNR_CASE(0x7);
- ALIGNR_CASE(0x8);
- ALIGNR_CASE(0x9);
- ALIGNR_CASE(0xA);
- ALIGNR_CASE(0xB);
- ALIGNR_CASE(0xC);
- ALIGNR_CASE(0xD);
- ALIGNR_CASE(0xE);
- ALIGNR_CASE(0xF);
- }
- #ifdef _MSC_VER
- _assume(0);
- #else
- __builtin_unreachable();
- #endif
- }
- bool memeq(const uint8_t* pu1, const uint8_t* pu2, size_t n) {
- auto* pa1 = (const uint8_t*)rounddown_n((uintptr_t)pu1, 16);
- auto* pa2 = (const uint8_t*)rounddown_n((uintptr_t)pu2, 16);
- auto diff1 = pu1 - pa1;
- auto diff2 = pu2 - pa2;
- auto xmm1_1 = _mm_load_si128((const __m128i*)pa1);
- auto xmm2_1 = _mm_load_si128((const __m128i*)pa2);
- size_t off;
- for (off = sizeof(__m128i); off <= n; off += sizeof(__m128i)) {
- auto xmm1_2 = _mm_load_si128((const __m128i*)(pa1 + off));
- auto xmm2_2 = _mm_load_si128((const __m128i*)(pa2 + off));
- auto xmm1_a = _mm_alignr_epi8_nonconst(xmm1_2, xmm1_1, diff1);
- auto xmm2_a = _mm_alignr_epi8_nonconst(xmm2_2, xmm2_1, diff2);
- if (!_mm_test_all_ones(_mm_cmpeq_epi8(xmm1_a, xmm2_a))) return false;
- xmm1_1 = xmm1_2;
- xmm2_1 = xmm2_2;
- }
- if (uint16_t left = n + sizeof(__m128i) - off; left > 0) {
- auto xmm1_2 = _mm_load_si128((const __m128i*)(pa1 + off));
- auto xmm2_2 = _mm_load_si128((const __m128i*)(pa2 + off));
- auto xmm1_a = _mm_alignr_epi8_nonconst(xmm1_2, xmm1_1, diff1);
- auto xmm2_a = _mm_alignr_epi8_nonconst(xmm2_2, xmm2_1, diff2);
- auto res = _mm_cmpeq_epi8(xmm1_a, xmm2_a);
- auto mask = (uint16_t)_mm_movemask_epi8(res);
- uint16_t bits = (1 << left) - 1;
- return (mask & bits) == bits;
- }
- return true;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement