Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <emmintrin.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <time.h>
- #define NEAR 10
- #define T1 100
- #define T2 1000
- #define T3 10000
- int func(int Di) {
- if (Di <= -T3) return -4;
- if (Di <= -T2) return -3;
- if (Di <= -T1) return -2;
- if (Di < -NEAR) return -1;
- if (Di <= NEAR) return 0;
- if (Di < T1) return 1;
- if (Di < T2) return 2;
- if (Di < T3) return 3;
- return 4;
- }
- int funcX(int Di) {
- int cmp_m3 = -(Di > -T3);
- int cmp_m2 = -(Di > -T2);
- int cmp_m1 = -(Di > -T1);
- int cmp_p0 = -(Di > NEAR);
- int reduce_true = cmp_m3 + cmp_m2 + cmp_m1 + cmp_p0;
- int cmp_m0 = -(Di < -NEAR);
- int cmp_p1 = -(Di < T1);
- int cmp_p2 = -(Di < T2);
- int cmp_p3 = -(Di < T3);
- int reduce_false = cmp_p3 + cmp_p2 + cmp_p1 + cmp_m0;
- return reduce_false - reduce_true;
- }
- __m128i func4(__m128i D) {
- __m128i cmp_m3 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T3));
- __m128i cmp_m2 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T2));
- __m128i cmp_m1 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T1));
- __m128i cmp_p0 = _mm_cmpgt_epi32(D, _mm_set1_epi32(NEAR));
- __m128i reduce_true = _mm_add_epi32(_mm_add_epi32(cmp_m3, cmp_m2), _mm_add_epi32(cmp_m1, cmp_p0));
- __m128i cmp_m0 = _mm_cmplt_epi32(D, _mm_set1_epi32(-NEAR));
- __m128i cmp_p1 = _mm_cmplt_epi32(D, _mm_set1_epi32(T1));
- __m128i cmp_p2 = _mm_cmplt_epi32(D, _mm_set1_epi32(T2));
- __m128i cmp_p3 = _mm_cmplt_epi32(D, _mm_set1_epi32(T3));
- __m128i reduce_false = _mm_add_epi32(_mm_add_epi32(cmp_p3, cmp_p2), _mm_add_epi32(cmp_p1, cmp_m0));
- return _mm_sub_epi32(reduce_false, reduce_true);
- }
- const int COUNT = 16<<10;
- static union {
- int data_i[4 * COUNT];
- __m128i data_r[COUNT];
- };
- static union {
- int res_i[4];
- __m128i res_r;
- };
- int main() {
- for (int i = 0; i < 4*COUNT; i++)
- data_i[i] = rand() % 32768 - 16768;
- for (int i = 0; i < COUNT; i++) {
- int corr[4];
- for (int j = 0; j < 4; j++)
- corr[j] = func(data_i[4*i+j]);
- res_r = func4(data_r[i]);
- for (int j = 0; j < 4; j++)
- if (corr[j] != res_i[j])
- printf("Wrong!");
- for (int j = 0; j < 4; j++)
- res_i[j] = funcX(data_i[4*i+j]);
- for (int j = 0; j < 4; j++)
- if (corr[j] != res_i[j])
- printf("Wrong!");
- }
- {
- int start = clock();
- int sum = 0;
- for (int q = 0; q < 10000; q++) {
- for (int i = 0; i < 4*COUNT; i++)
- sum += func(data_i[i]);
- }
- printf("Time = %0.3lf (%d)\n", double(clock() - start) / CLOCKS_PER_SEC, sum);
- }
- {
- int start = clock();
- __m128i sum = _mm_setzero_si128();
- for (int q = 0; q < 10000; q++) {
- for (int i = 0; i < COUNT; i++)
- sum = _mm_add_epi32(sum, func4(data_r[i]));
- }
- res_r = sum;
- int rsum = res_i[0] + res_i[1] + res_i[2] + res_i[3];
- printf("Time = %0.3lf (%d)\n", double(clock() - start) / CLOCKS_PER_SEC, rsum);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement