Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __forceinline //因为通常Size已知,内联后编译器可以优化掉大部分无用代码
- void* myMemcpy(char* Dst, const char* Src, size_t Size)
- {
- void* start = Dst;
- for ( ; Size >= sizeof(__m256i); Size -= sizeof(__m256i) )
- {
- __m256i ymm = _mm256_loadu_si256(((const __m256i* &)Src)++);
- _mm256_storeu_si256(((__m256i* &)Dst)++, ymm);
- }
- #define CPY_1B *((uint8_t * &)Dst)++ = *((const uint8_t * &)Src)++
- #define CPY_2B *((uint16_t* &)Dst)++ = *((const uint16_t* &)Src)++
- #define CPY_4B *((uint32_t* &)Dst)++ = *((const uint32_t* &)Src)++
- #if defined _M_X64 || defined _M_IA64 || defined __amd64
- # define CPY_8B *((uint64_t* &)Dst)++ = *((const uint64_t* &)Src)++
- #else
- # define CPY_8B _mm_storel_epi64((__m128i *)Dst, _mm_loadu_si128((const __m128i *)Src)), ++(const uint64_t* &)Src, ++(uint64_t* &)Dst
- #endif
- #define CPY16B _mm_storeu_si128((__m128i *)Dst, _mm_loadu_si128((const __m128i *)Src)), ++(const __m128i* &)Src, ++(__m128i* &)Dst
- switch (Size)
- {
- case 0x00: break;
- case 0x01: CPY_1B; break;
- case 0x02: CPY_2B; break;
- case 0x03: CPY_1B; CPY_2B; break;
- case 0x04: CPY_4B; break;
- case 0x05: CPY_1B; CPY_4B; break;
- case 0x06: CPY_2B; CPY_4B; break;
- case 0x07: CPY_1B; CPY_2B; CPY_4B; break;
- case 0x08: CPY_8B; break;
- case 0x09: CPY_1B; CPY_8B; break;
- case 0x0A: CPY_2B; CPY_8B; break;
- case 0x0B: CPY_1B; CPY_2B; CPY_8B; break;
- case 0x0C: CPY_4B; CPY_8B; break;
- case 0x0D: CPY_1B; CPY_4B; CPY_8B; break;
- case 0x0E: CPY_2B; CPY_4B; CPY_8B; break;
- case 0x0F: CPY_1B; CPY_2B; CPY_4B; CPY_8B; break;
- case 0x10: CPY16B; break;
- case 0x11: CPY_1B; CPY16B; break;
- case 0x12: CPY_2B; CPY16B; break;
- case 0x13: CPY_1B; CPY_2B; CPY16B; break;
- case 0x14: CPY_4B; CPY16B; break;
- case 0x15: CPY_1B; CPY_4B; CPY16B; break;
- case 0x16: CPY_2B; CPY_4B; CPY16B; break;
- case 0x17: CPY_1B; CPY_2B; CPY_4B; CPY16B; break;
- case 0x18: CPY_8B; CPY16B; break;
- case 0x19: CPY_1B; CPY_8B; CPY16B; break;
- case 0x1A: CPY_2B; CPY_8B; CPY16B; break;
- case 0x1B: CPY_1B; CPY_2B; CPY_8B; CPY16B; break;
- case 0x1C: CPY_4B; CPY_8B; CPY16B; break;
- case 0x1D: CPY_1B; CPY_4B; CPY_8B; CPY16B; break;
- case 0x1E: CPY_2B; CPY_4B; CPY_8B; CPY16B; break;
- case 0x1F: CPY_1B; CPY_2B; CPY_4B; CPY_8B; CPY16B; break;
- }
- #undef CPY_1B
- #undef CPY_2B
- #undef CPY_4B
- #undef CPY_8B
- #undef CPY16B
- return start;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement