Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- static word neg_word(warp all_ones, warp one, word value)
- {
- #ifdef __AVX2__
- const __m256i lane64_one = _mm256_broadcastq_epi64(_mm256_castsi256_si128(one));
- const __m256i maxu64 = _mm256_sub_epi64(_mm256_setzero_si256(), one);
- //
- const __m256i notv = _mm256_andnot_si256(value, all_ones);
- const __m256i mask0 = _mm256_cmpeq_epi64(all_ones, notv);
- const __m256i temp = _mm256_permute4x64_epi64(mask0, _MM_SHUFFLE(2, 1, 0, 0));
- const __m256i mask1 = _mm256_blendv_epi8(temp, maxu64, maxu64);
- const __m256i mask2 = _mm256_permute4x64_epi64(mask1, _MM_SHUFFLE(2, 1, 0, 0));
- const __m256i mask3 = _mm256_permute4x64_epi64(mask1, _MM_SHUFFLE(1, 0, 0, 0));
- const __m256i mask = _mm256_and_si256(_mm256_and_si256(mask1, mask2), mask3);
- const __m256i carry = _mm256_and_si256(mask, lane64_one);
- const __m256i result = _mm256_add_epi64(notv, carry);
- return result;
- #elif __SSSE3__
- const __m128i maxu64 = _mm_sub_epi64(_mm_setzero_si128(), one);
- const __m128i lane64_one = _mm_shuffle_epi32(one, _MM_SHUFFLE(1, 0, 1, 0));
- //
- const __m128i notvlo = _mm_andnot_si128(value.lo, all_ones);
- const __m128i notvhi = _mm_andnot_si128(value.hi, all_ones);
- const __m128i mask0lo = mm_cmpeq_epi64(all_ones, notvlo);
- const __m128i mask0hi = mm_cmpeq_epi64(all_ones, notvhi);
- const __m128i mask1lo = _mm_or_si128(_mm_bslli_si128(mask0lo, 8), maxu64);
- const __m128i mask1hi = _mm_unpackhi_epi64(mask0lo, _mm_bslli_si128(mask0hi, 8));
- const __m128i mask2lo = all_ones;
- const __m128i mask2hi = mask0lo;
- const __m128i mask3lo = all_ones;
- const __m128i mask3hi = _mm_or_si128(_mm_bslli_si128(mask0lo, 8), maxu64);
- const __m128i masklo = _mm_and_si128(_mm_and_si128(mask1lo, mask2lo), mask3lo);
- const __m128i maskhi = _mm_and_si128(_mm_and_si128(mask1hi, mask2hi), mask3hi);
- const __m128i carrylo = _mm_and_si128(masklo, lane64_one);
- const __m128i carryhi = _mm_and_si128(maskhi, lane64_one);
- const __m128i lo = _mm_add_epi32(notvlo, carrylo);
- const __m128i hi = _mm_add_epi32(notvhi, carryhi);
- return (word){ lo, hi };
- #endif
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement