Advertisement
stgatilov

Vectorized byte table lookup

Sep 1st, 2015
597
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.48 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdint.h>
  3. #include <assert.h>
  4. #include <vector>
  5. #include <tmmintrin.h>
  6.  
  7. uint8_t chain[3][3] = {
  8.   3,  2, 1,  //  y --> x
  9.   4, -1, 0,  //  |
  10.   5,  6, 7   //  |
  11. };           //  v
  12.  
  13. int main() {
  14.   std::vector<uint8_t> x;
  15.   std::vector<uint8_t> y;    
  16.   int n = 10000;
  17.   for (int i = 0; i < n; i++) {
  18.     x.push_back(i % 3);
  19.     y.push_back((i + 1) % 5 % 3);
  20.   }
  21.  
  22.   //initialise table register
  23.   __m128i table = _mm_setr_epi8(
  24.     chain[0][0], chain[0][1], chain[0][2], 99,
  25.     chain[1][0], chain[1][1], chain[1][2], 99,
  26.     chain[2][0], chain[2][1], chain[2][2], 99,
  27.     99, 99, 99, 99
  28.   );
  29.  
  30.   std::vector<uint8_t> chain_code(n);
  31.   int b = (n / 16) * 16;
  32.   for (int i = 0; i < b; i += 16) {
  33.     //load 16 X/Y bytes
  34.     __m128i regX = _mm_loadu_si128((__m128i*)&x[i]);
  35.     __m128i regY = _mm_loadu_si128((__m128i*)&y[i]);
  36.     //shift all X values left by 2 bits (as 16-bit integers)
  37.     __m128i regX4 = _mm_slli_epi16(regX, 2);
  38.     //calculate linear indices (x * 4 + y)
  39.     __m128i indices = _mm_add_epi8(regX4, regY);
  40.     //perform 16 lookups
  41.     __m128i res = _mm_shuffle_epi8(table, indices);
  42.     //store results
  43.     _mm_storeu_si128((__m128i*)&chain_code[i], res);
  44.   }
  45.   for (int i = b; i < n; i++)
  46.     chain_code[i] = chain[x[i]][y[i]];                        
  47.   for (int i = 0; i < 10000; i++) {
  48. //    printf("%d %d\n", int(chain_code[i]), int(chain[x[i]][y[i]]));
  49.     assert(chain_code[i] == chain[x[i]][y[i]]);
  50.   }
  51.  
  52.   return 0;
  53. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement