Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdint.h>
- #include <assert.h>
- #include <vector>
- #include <tmmintrin.h>
- uint8_t chain[3][3] = {
- 3, 2, 1, // y --> x
- 4, -1, 0, // |
- 5, 6, 7 // |
- }; // v
- int main() {
- std::vector<uint8_t> x;
- std::vector<uint8_t> y;
- int n = 10000;
- for (int i = 0; i < n; i++) {
- x.push_back(i % 3);
- y.push_back((i + 1) % 5 % 3);
- }
- //initialise table register
- __m128i table = _mm_setr_epi8(
- chain[0][0], chain[0][1], chain[0][2], 99,
- chain[1][0], chain[1][1], chain[1][2], 99,
- chain[2][0], chain[2][1], chain[2][2], 99,
- 99, 99, 99, 99
- );
- std::vector<uint8_t> chain_code(n);
- int b = (n / 16) * 16;
- for (int i = 0; i < b; i += 16) {
- //load 16 X/Y bytes
- __m128i regX = _mm_loadu_si128((__m128i*)&x[i]);
- __m128i regY = _mm_loadu_si128((__m128i*)&y[i]);
- //shift all X values left by 2 bits (as 16-bit integers)
- __m128i regX4 = _mm_slli_epi16(regX, 2);
- //calculate linear indices (x * 4 + y)
- __m128i indices = _mm_add_epi8(regX4, regY);
- //perform 16 lookups
- __m128i res = _mm_shuffle_epi8(table, indices);
- //store results
- _mm_storeu_si128((__m128i*)&chain_code[i], res);
- }
- for (int i = b; i < n; i++)
- chain_code[i] = chain[x[i]][y[i]];
- for (int i = 0; i < 10000; i++) {
- // printf("%d %d\n", int(chain_code[i]), int(chain[x[i]][y[i]]));
- assert(chain_code[i] == chain[x[i]][y[i]]);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement