Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <immintrin.h>
- int add(__m256i start) {
- __m256i one = _mm256_set1_epi8(1);
- __m256i zero = _mm256_setzero_si256();
- __m256i ymm1 = _mm256_maddubs_epi16(start, one);
- __m256i ymm2 = _mm256_hadd_epi16(ymm1, zero);
- __m256i ymm3 = _mm256_hadd_epi16(ymm2, zero);
- __m256i ymm4 = _mm256_hadd_epi16(ymm3, zero);
- return _mm256_extract_epi32(ymm4, 0) + _mm256_extract_epi32(ymm4, 4);
- }
- int add2(__m256i start) {
- __m256i one = _mm256_set1_epi8(1);
- __m256i zero = _mm256_setzero_si256();
- __m256i ymm1 = _mm256_maddubs_epi16(start, one);
- __m256i ymm2 = _mm256_hadd_epi16(ymm1, zero);
- __m256i ymm3 = _mm256_hadd_epi16(ymm2, zero);
- __m256i ymm4 = _mm256_hadd_epi16(ymm3, zero);
- __m256i ymm5 = _mm256_permute4x64_epi64(ymm4, _MM_SHUFFLE(0, 0, 0, 2));
- __m256i ymm6 = _mm256_add_epi64(ymm4, ymm5);
- return _mm256_extract_epi32(ymm6, 0);
- }
- int add3(__m256i start) {
- char buf[32];
- _mm256_storeu_si256((__m256i *)buf, start);
- int res = 0;
- for (int i = 0; i < 32; ++i) {
- res += buf[i];
- }
- return res;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement