Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <immintrin.h>
- #include <stdint.h>
- __attribute__((always_inline)) void* myMemset(void* Dst, int Val, size_t Size)
- {
- void* first = Dst;
- __m256i ymm = _mm256_set1_epi8((uint8_t)Val);
- for (; Size >= 32; Size -= 32)
- {
- _mm256_storeu_si256((__m256i *)Dst, ymm);
- (uint8_t* &)Dst += 32;
- }
- #if defined _M_X64 || defined _M_IA64 || defined __amd64
- const uint64_t temp = 0x0d101010101010101ULL * (uint8_t)Val;
- #else
- const uint32_t temp = 0x0d1010101UL * (uint8_t)Val;
- #endif
- #define SET1B *((uint8_t* &)Dst)++ = (uint8_t )temp
- #define SET2B *((uint16_t* &)Dst)++ = (uint16_t)temp
- #define SET4B *((uint32_t* &)Dst)++ = (uint32_t)temp
- #if defined _M_X64 || defined _M_IA64 || defined __amd64
- # define SET8B *((uint64_t* &)Dst)++ = (uint64_t)temp
- #else
- # define SET8B _mm_storel_epi64((__m128i *)Dst, _mm256_castsi256_si128(ymm)), ++(uint64_t* &)Dst
- #endif
- #define SET16B _mm_storeu_si128((__m128i *)Dst, _mm256_castsi256_si128(ymm)), ++(__m128i* &)Dst
- switch (Size)
- {
- case 0x00: break;
- case 0x01: SET1B; break;
- case 0x02: SET2B; break;
- case 0x03: SET1B; SET2B; break;
- case 0x04: SET4B; break;
- case 0x05: SET1B; SET4B; break;
- case 0x06: SET2B; SET4B; break;
- case 0x07: SET1B; SET2B; SET4B; break;
- case 0x08: SET8B; break;
- case 0x09: SET1B; SET8B; break;
- case 0x0A: SET2B; SET8B; break;
- case 0x0B: SET1B; SET2B; SET8B; break;
- case 0x0C: SET4B; SET8B; break;
- case 0x0D: SET1B; SET4B; SET8B; break;
- case 0x0E: SET2B; SET4B; SET8B; break;
- case 0x0F: SET1B; SET2B; SET4B; SET8B; break;
- case 0x10: SET16B; break;
- case 0x11: SET1B; SET16B; break;
- case 0x12: SET2B; SET16B; break;
- case 0x13: SET1B; SET2B; SET16B; break;
- case 0x14: SET4B; SET16B; break;
- case 0x15: SET1B; SET4B; SET16B; break;
- case 0x16: SET2B; SET4B; SET16B; break;
- case 0x17: SET1B; SET2B; SET4B; SET16B; break;
- case 0x18: SET8B; SET16B; break;
- case 0x19: SET1B; SET8B; SET16B; break;
- case 0x1A: SET2B; SET8B; SET16B; break;
- case 0x1B: SET1B; SET2B; SET8B; SET16B; break;
- case 0x1C: SET4B; SET8B; SET16B; break;
- case 0x1D: SET1B; SET4B; SET8B; SET16B; break;
- case 0x1E: SET2B; SET4B; SET8B; SET16B; break;
- case 0x1F: SET1B; SET2B; SET4B; SET8B; SET16B; break;
- }
- #undef SET1B
- #undef SET2B
- #undef SET4B
- #undef SET8B
- #undef SET16B
- return first;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement