Advertisement
zhangsongcui

myMemset

Sep 13th, 2011
275
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.12 KB | None | 0 0
  1. #include <immintrin.h>
  2. #include <stdint.h>
  3.  
  4. __attribute__((always_inline)) void* myMemset(void* Dst, int Val, size_t Size)
  5. {
  6.     void* first = Dst;
  7.     __m256i ymm = _mm256_set1_epi8((uint8_t)Val);
  8.     for (; Size >= 32; Size -= 32)
  9.     {
  10.         _mm256_storeu_si256((__m256i *)Dst, ymm);
  11.         (uint8_t* &)Dst += 32;
  12.     }
  13. #if defined _M_X64 || defined _M_IA64 || defined __amd64
  14.     const uint64_t temp = 0x0d101010101010101ULL * (uint8_t)Val;
  15. #else
  16.     const uint32_t temp = 0x0d1010101UL * (uint8_t)Val;
  17. #endif
  18. #define SET1B *((uint8_t*  &)Dst)++ = (uint8_t )temp
  19. #define SET2B *((uint16_t* &)Dst)++ = (uint16_t)temp
  20. #define SET4B *((uint32_t* &)Dst)++ = (uint32_t)temp
  21. #if defined _M_X64 || defined _M_IA64 || defined __amd64
  22. #   define SET8B *((uint64_t* &)Dst)++ = (uint64_t)temp
  23. #else
  24. #   define SET8B _mm_storel_epi64((__m128i *)Dst, _mm256_castsi256_si128(ymm)), ++(uint64_t* &)Dst
  25. #endif
  26. #define SET16B _mm_storeu_si128((__m128i *)Dst, _mm256_castsi256_si128(ymm)), ++(__m128i* &)Dst
  27.     switch (Size)
  28.     {
  29.     case 0x00:                                          break;
  30.     case 0x01:  SET1B;                                  break;
  31.     case 0x02:          SET2B;                          break;
  32.     case 0x03:  SET1B;  SET2B;                          break;
  33.     case 0x04:  SET4B;                                  break;
  34.     case 0x05:  SET1B;          SET4B;                  break;
  35.     case 0x06:          SET2B;  SET4B;                  break;
  36.     case 0x07:  SET1B;  SET2B;  SET4B;                  break;
  37.     case 0x08:                          SET8B;          break;
  38.     case 0x09:  SET1B;                  SET8B;          break;
  39.     case 0x0A:          SET2B;          SET8B;          break;
  40.     case 0x0B:  SET1B;  SET2B;          SET8B;          break;
  41.     case 0x0C:                  SET4B;  SET8B;          break;
  42.     case 0x0D:  SET1B;          SET4B;  SET8B;          break;
  43.     case 0x0E:          SET2B;  SET4B;  SET8B;          break;
  44.     case 0x0F:  SET1B;  SET2B;  SET4B;  SET8B;          break;
  45.     case 0x10:                                  SET16B; break;
  46.     case 0x11:  SET1B;                          SET16B; break;
  47.     case 0x12:          SET2B;                  SET16B; break;
  48.     case 0x13:  SET1B;  SET2B;                  SET16B; break;
  49.     case 0x14:  SET4B;                          SET16B; break;
  50.     case 0x15:  SET1B;          SET4B;          SET16B; break;
  51.     case 0x16:          SET2B;  SET4B;          SET16B; break;
  52.     case 0x17:  SET1B;  SET2B;  SET4B;          SET16B; break;
  53.     case 0x18:                          SET8B;  SET16B; break;
  54.     case 0x19:  SET1B;                  SET8B;  SET16B; break;
  55.     case 0x1A:          SET2B;          SET8B;  SET16B; break;
  56.     case 0x1B:  SET1B;  SET2B;          SET8B;  SET16B; break;
  57.     case 0x1C:                  SET4B;  SET8B;  SET16B; break;
  58.     case 0x1D:  SET1B;          SET4B;  SET8B;  SET16B; break;
  59.     case 0x1E:          SET2B;  SET4B;  SET8B;  SET16B; break;
  60.     case 0x1F:  SET1B;  SET2B;  SET4B;  SET8B;  SET16B; break;
  61.     }
  62. #undef SET1B
  63. #undef SET2B
  64. #undef SET4B
  65. #undef SET8B
  66. #undef SET16B
  67.     return first;
  68. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement