Guest User

test.cpp

a guest
Jun 19th, 2022
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.55 KB | None | 0 0
  1. #include <cstdint>
  2. #include <cassert>
  3. #include <cstring>
  4. #include <immintrin.h>
  5.  
  6. template <typename T, typename P>
  7. static inline T sk_unaligned_load(const P* ptr) {
  8.     T val;
  9.     memcpy(&val, ptr, sizeof(val));
  10.     return val;
  11. }
  12.  
  13. namespace sse42 {
  14.     // This is not a CRC32.  It's Just A Hash that uses those instructions because they're fast.
  15.     /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
  16.         auto data = (const uint8_t*)vdata;
  17.  
  18.         // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
  19.         uint64_t hash = seed;
  20.         if (bytes >= 24) {
  21.             // We'll create 3 independent hashes, each using _mm_crc32_u64()
  22.             // to hash 8 bytes per step.  Both 3 and independent are important:
  23.             // we can execute 3 of these instructions in parallel on a single core.
  24.             uint64_t a = hash,
  25.                      b = hash,
  26.                      c = hash;
  27.             size_t steps = bytes/24;
  28.             while (steps --> 0) {
  29.                 a = _mm_crc32_u64(a, sk_unaligned_load<uint64_t>(data+ 0));
  30.                 b = _mm_crc32_u64(b, sk_unaligned_load<uint64_t>(data+ 8));
  31.                 c = _mm_crc32_u64(c, sk_unaligned_load<uint64_t>(data+16));
  32.                 data += 24;
  33.             }
  34.             bytes %= 24;
  35.             hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
  36.         }
  37.  
  38.         assert(bytes < 24);
  39.         if (bytes >= 16) {
  40.             hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
  41.             bytes -= 8;
  42.             data  += 8;
  43.         }
  44.  
  45.         assert(bytes < 16);
  46.         if (bytes & 8) {
  47.             hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
  48.             data  += 8;
  49.         }
  50.  
  51.         // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
  52.         // We don't lose anything here: only the bottom 32-bits were populated.
  53.         auto hash32 = (uint32_t)hash;
  54.  
  55.         if (bytes & 4) {
  56.             hash32 = _mm_crc32_u32(hash32, sk_unaligned_load<uint32_t>(data));
  57.             data += 4;
  58.         }
  59.         if (bytes & 2) {
  60.             hash32 = _mm_crc32_u16(hash32, sk_unaligned_load<uint16_t>(data));
  61.             data += 2;
  62.         }
  63.         if (bytes & 1) {
  64.             hash32 = _mm_crc32_u8(hash32, sk_unaligned_load<uint8_t>(data));
  65.         }
  66.         return hash32;
  67.     }
  68. }  // namespace sse42
  69.  
  70. namespace SkOpts {
  71.     extern uint32_t (*hash_fn)(const void*, size_t, uint32_t);
  72.  
  73.     void Init_sse42() {
  74.         hash_fn = sse42::hash_fn;
  75.     }
  76. }
Advertisement
Add Comment
Please, Sign In to add comment