zhangsongcui

myMemcpy

Sep 10th, 2011
531
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. __forceinline   //因为通常Size已知,内联后编译器可以优化掉大部分无用代码
  2. void* myMemcpy(char* Dst, const char* Src, size_t Size)
  3. {
  4.     void* start = Dst;
  5.     for ( ; Size >= sizeof(__m256i); Size -= sizeof(__m256i) )
  6.     {
  7.         __m256i ymm = _mm256_loadu_si256(((const __m256i* &)Src)++);
  8.         _mm256_storeu_si256(((__m256i* &)Dst)++, ymm);
  9.     }
  10.  
  11. #define CPY_1B *((uint8_t * &)Dst)++ = *((const uint8_t * &)Src)++
  12. #define CPY_2B *((uint16_t* &)Dst)++ = *((const uint16_t* &)Src)++
  13. #define CPY_4B *((uint32_t* &)Dst)++ = *((const uint32_t* &)Src)++
  14. #if defined _M_X64 || defined _M_IA64 || defined __amd64
  15. #   define CPY_8B *((uint64_t* &)Dst)++ = *((const uint64_t* &)Src)++
  16. #else
  17. #   define CPY_8B _mm_storel_epi64((__m128i *)Dst, _mm_loadu_si128((const __m128i *)Src)), ++(const uint64_t* &)Src, ++(uint64_t* &)Dst
  18. #endif
  19. #define CPY16B _mm_storeu_si128((__m128i *)Dst, _mm_loadu_si128((const __m128i *)Src)), ++(const __m128i* &)Src, ++(__m128i* &)Dst
  20.  
  21.     switch (Size)
  22.     {
  23.     case 0x00:                                          break;
  24.     case 0x01:  CPY_1B;                                 break;
  25.     case 0x02:          CPY_2B;                         break;
  26.     case 0x03:  CPY_1B; CPY_2B;                         break;
  27.     case 0x04:                  CPY_4B;                 break;
  28.     case 0x05:  CPY_1B;         CPY_4B;                 break;
  29.     case 0x06:          CPY_2B; CPY_4B;                 break;
  30.     case 0x07:  CPY_1B; CPY_2B; CPY_4B;                 break;
  31.     case 0x08:                          CPY_8B;         break;
  32.     case 0x09:  CPY_1B;                 CPY_8B;         break;
  33.     case 0x0A:          CPY_2B;         CPY_8B;         break;
  34.     case 0x0B:  CPY_1B; CPY_2B;         CPY_8B;         break;
  35.     case 0x0C:                  CPY_4B; CPY_8B;         break;
  36.     case 0x0D:  CPY_1B;         CPY_4B; CPY_8B;         break;
  37.     case 0x0E:          CPY_2B; CPY_4B; CPY_8B;         break;
  38.     case 0x0F:  CPY_1B; CPY_2B; CPY_4B; CPY_8B;         break;
  39.     case 0x10:                                  CPY16B; break;
  40.     case 0x11:  CPY_1B;                         CPY16B; break;
  41.     case 0x12:          CPY_2B;                 CPY16B; break;
  42.     case 0x13:  CPY_1B; CPY_2B;                 CPY16B; break;
  43.     case 0x14:                  CPY_4B;         CPY16B; break;
  44.     case 0x15:  CPY_1B;         CPY_4B;         CPY16B; break;
  45.     case 0x16:          CPY_2B; CPY_4B;         CPY16B; break;
  46.     case 0x17:  CPY_1B; CPY_2B; CPY_4B;         CPY16B; break;
  47.     case 0x18:                          CPY_8B; CPY16B; break;
  48.     case 0x19:  CPY_1B;                 CPY_8B; CPY16B; break;
  49.     case 0x1A:          CPY_2B;         CPY_8B; CPY16B; break;
  50.     case 0x1B:  CPY_1B; CPY_2B;         CPY_8B; CPY16B; break;
  51.     case 0x1C:                  CPY_4B; CPY_8B; CPY16B; break;
  52.     case 0x1D:  CPY_1B;         CPY_4B; CPY_8B; CPY16B; break;
  53.     case 0x1E:          CPY_2B; CPY_4B; CPY_8B; CPY16B; break;
  54.     case 0x1F:  CPY_1B; CPY_2B; CPY_4B; CPY_8B; CPY16B; break;
  55.     }
  56. #undef CPY_1B
  57. #undef CPY_2B
  58. #undef CPY_4B
  59. #undef CPY_8B
  60. #undef CPY16B
  61.     return start;
  62. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×