Advertisement
stgatilov

Vectorized base4 numbers conversion

Sep 4th, 2015
285
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.69 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdint.h>
  3. #include <stdlib.h>
  4. #include <time.h>
  5. #include <tmmintrin.h>
  6.  
  7.  
  8. uint32_t str2bin(const char *s) {
  9.     uint32_t val = 0;
  10.     for (int i = 0; s[i]; i++)
  11.         val = val + ((s[i] - '0') << (2*i));
  12.     return val;
  13. }
  14. size_t bin2str(uint32_t val, char *s) {
  15.     size_t i;
  16.     for (i = 0; i < 16/* && val*/; i++) {
  17.         s[i] = (val & 3) + '0';
  18.         val >>= 2;
  19.     }
  20.     s[i] = 0;
  21.     return i;
  22. }
  23.  
  24. uint32_t str2bin_sse(const char *s) {
  25.     __m128i reg = _mm_loadu_si128((__m128i*)s);
  26.     reg = _mm_sub_epi8(reg, _mm_set1_epi8('0'));
  27.     __m128i bit0 = _mm_slli_epi16(reg, 7);
  28.     __m128i bit1 = _mm_slli_epi16(reg, 6);
  29.     __m128i lower = _mm_unpacklo_epi8(bit0, bit1);
  30.     __m128i upper = _mm_unpackhi_epi8(bit0, bit1);
  31.     uint32_t val = _mm_movemask_epi8(upper);
  32.     val <<= 16;
  33.     val += _mm_movemask_epi8(lower);
  34.     return val;
  35. }
  36. size_t bin2str_sse_a(uint32_t val, char *s) {
  37.     __m128i reg = _mm_cvtsi32_si128(val);
  38.     __m128i bytes = _mm_shuffle_epi8(reg, _mm_setr_epi8(-1, 0, -1, 0, -1, 1, -1, 1, -1, 2, -1, 2, -1, 3, -1, 3));
  39.     __m128i even = _mm_mulhi_epu16(bytes, _mm_set1_epi32(0x00100100));  //epi16:  1<<8,  1<<4  x4 times
  40.     __m128i odd  = _mm_mulhi_epu16(bytes, _mm_set1_epi32(0x04004000));  //epi16: 1<<14, 1<<10  x4 times
  41.     even = _mm_and_si128(even, _mm_set1_epi16(0x0003));
  42.     odd  = _mm_and_si128(odd , _mm_set1_epi16(0x0300));
  43.     __m128i res = _mm_xor_si128(even, odd);
  44.     res = _mm_add_epi8(res, _mm_set1_epi8('0'));
  45.     _mm_storeu_si128((__m128i*)s, res);
  46.     return 16;
  47. }
  48. size_t bin2str_sse_b(uint32_t val, char *s) {
  49.     __m128i reg = _mm_cvtsi32_si128(val);
  50.     __m128i bytes = _mm_shuffle_epi8(reg, _mm_setr_epi8(0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3));
  51.     __m128i twobits = _mm_and_si128(bytes, _mm_set1_epi32(0xC0300C03));         //epi8: 3<<0, 3<<2, 3<<4, 3<<6  x4 times
  52.     twobits = _mm_maddubs_epi16(twobits, _mm_set1_epi16(0x4001));               //epi8: 1<<0, 1<<6  x8 times
  53.     __m128i res = _mm_madd_epi16(twobits, _mm_set1_epi32(0x10000001));          //epi16: 1<<0, 1<<12  x4 times
  54.     res = _mm_add_epi8(res, _mm_set1_epi8('0'));
  55.     _mm_storeu_si128((__m128i*)s, res);
  56.     return 16;
  57. }
  58.  
  59.  
  60. int main() {
  61.     srand(time(0));
  62.     for (int i = 0; i < 70; i++) {
  63.         uint32_t num = rand();
  64.         num <<= 15;
  65.         num ^= rand();
  66.         num <<= 15;
  67.         num ^= rand();
  68.         num <<= 15;
  69.         num ^= rand();
  70.  
  71.         char str[20];
  72.         bin2str_sse_b(num, str);
  73.         uint32_t val;
  74.         val = str2bin_sse(str);
  75.  
  76.         printf("%08X %s %08X   %s\n", num, str, val, (num == val ? "" : "!"));
  77.     }                                                
  78.  
  79.     return 0;
  80. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement