Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cstdint>
- #include <cassert>
- #include <cstring>
- #include <immintrin.h>
- template <typename T, typename P>
- static inline T sk_unaligned_load(const P* ptr) {
- T val;
- memcpy(&val, ptr, sizeof(val));
- return val;
- }
- namespace sse42 {
- // This is not a CRC32. It's Just A Hash that uses those instructions because they're fast.
- /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
- auto data = (const uint8_t*)vdata;
- // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
- uint64_t hash = seed;
- if (bytes >= 24) {
- // We'll create 3 independent hashes, each using _mm_crc32_u64()
- // to hash 8 bytes per step. Both 3 and independent are important:
- // we can execute 3 of these instructions in parallel on a single core.
- uint64_t a = hash,
- b = hash,
- c = hash;
- size_t steps = bytes/24;
- while (steps --> 0) {
- a = _mm_crc32_u64(a, sk_unaligned_load<uint64_t>(data+ 0));
- b = _mm_crc32_u64(b, sk_unaligned_load<uint64_t>(data+ 8));
- c = _mm_crc32_u64(c, sk_unaligned_load<uint64_t>(data+16));
- data += 24;
- }
- bytes %= 24;
- hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
- }
- assert(bytes < 24);
- if (bytes >= 16) {
- hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
- bytes -= 8;
- data += 8;
- }
- assert(bytes < 16);
- if (bytes & 8) {
- hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
- data += 8;
- }
- // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
- // We don't lose anything here: only the bottom 32-bits were populated.
- auto hash32 = (uint32_t)hash;
- if (bytes & 4) {
- hash32 = _mm_crc32_u32(hash32, sk_unaligned_load<uint32_t>(data));
- data += 4;
- }
- if (bytes & 2) {
- hash32 = _mm_crc32_u16(hash32, sk_unaligned_load<uint16_t>(data));
- data += 2;
- }
- if (bytes & 1) {
- hash32 = _mm_crc32_u8(hash32, sk_unaligned_load<uint8_t>(data));
- }
- return hash32;
- }
- } // namespace sse42
- namespace SkOpts {
- extern uint32_t (*hash_fn)(const void*, size_t, uint32_t);
- void Init_sse42() {
- hash_fn = sse42::hash_fn;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment