Advertisement
stgatilov

Khai Nguyen Question

Jul 31st, 2015
654
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <emmintrin.h>
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4. #include <time.h>
  5.  
  6. #define NEAR 10
  7. #define T1 100
  8. #define T2 1000
  9. #define T3 10000
  10.  
  11. int func(int Di) {
  12.   if (Di <= -T3) return  -4;
  13.   if (Di <= -T2) return  -3;
  14.   if (Di <= -T1) return  -2;
  15.   if (Di < -NEAR)  return  -1;
  16.   if (Di <= NEAR) return   0;
  17.   if (Di < T1)   return   1;
  18.   if (Di < T2)   return   2;
  19.   if (Di < T3)   return   3;
  20.   return  4;
  21. }
  22.  
  23. int funcX(int Di) {
  24.   int cmp_m3 = -(Di > -T3);
  25.   int cmp_m2 = -(Di > -T2);
  26.   int cmp_m1 = -(Di > -T1);
  27.   int cmp_p0 = -(Di > NEAR);
  28.   int reduce_true = cmp_m3 + cmp_m2 + cmp_m1 + cmp_p0;
  29.   int cmp_m0 = -(Di < -NEAR);
  30.   int cmp_p1 = -(Di < T1);
  31.   int cmp_p2 = -(Di < T2);
  32.   int cmp_p3 = -(Di < T3);
  33.   int reduce_false = cmp_p3 + cmp_p2 + cmp_p1 + cmp_m0;
  34.   return reduce_false - reduce_true;
  35. }
  36.  
  37. __m128i func4(__m128i D) {
  38.   __m128i cmp_m3 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T3));
  39.   __m128i cmp_m2 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T2));
  40.   __m128i cmp_m1 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T1));
  41.   __m128i cmp_p0 = _mm_cmpgt_epi32(D, _mm_set1_epi32(NEAR));
  42.   __m128i reduce_true = _mm_add_epi32(_mm_add_epi32(cmp_m3, cmp_m2), _mm_add_epi32(cmp_m1, cmp_p0));
  43.   __m128i cmp_m0 = _mm_cmplt_epi32(D, _mm_set1_epi32(-NEAR));
  44.   __m128i cmp_p1 = _mm_cmplt_epi32(D, _mm_set1_epi32(T1));
  45.   __m128i cmp_p2 = _mm_cmplt_epi32(D, _mm_set1_epi32(T2));
  46.   __m128i cmp_p3 = _mm_cmplt_epi32(D, _mm_set1_epi32(T3));
  47.   __m128i reduce_false = _mm_add_epi32(_mm_add_epi32(cmp_p3, cmp_p2), _mm_add_epi32(cmp_p1, cmp_m0));
  48.   return _mm_sub_epi32(reduce_false, reduce_true);
  49. }
  50.  
  51. const int COUNT = 16<<10;
  52.  
  53. static union {
  54.   int data_i[4 * COUNT];
  55.   __m128i data_r[COUNT];
  56. };
  57.  
  58. static union {
  59.   int res_i[4];
  60.   __m128i res_r;
  61. };
  62.  
  63. int main() {
  64.   for (int i = 0; i < 4*COUNT; i++)
  65.     data_i[i] = rand() % 32768 - 16768;
  66.  
  67.   for (int i = 0; i < COUNT; i++) {
  68.     int corr[4];
  69.     for (int j = 0; j < 4; j++)
  70.       corr[j] = func(data_i[4*i+j]);
  71.  
  72.     res_r = func4(data_r[i]);
  73.     for (int j = 0; j < 4; j++)
  74.       if (corr[j] != res_i[j])
  75.         printf("Wrong!");
  76.  
  77.     for (int j = 0; j < 4; j++)
  78.       res_i[j] = funcX(data_i[4*i+j]);
  79.     for (int j = 0; j < 4; j++)
  80.       if (corr[j] != res_i[j])
  81.         printf("Wrong!");
  82.   }
  83.  
  84.   {
  85.     int start = clock();
  86.     int sum = 0;
  87.     for (int q = 0; q < 10000; q++) {
  88.       for (int i = 0; i < 4*COUNT; i++)
  89.         sum += func(data_i[i]);
  90.     }
  91.     printf("Time = %0.3lf   (%d)\n", double(clock() - start) / CLOCKS_PER_SEC, sum);
  92.   }
  93.  
  94.   {
  95.     int start = clock();
  96.     __m128i sum = _mm_setzero_si128();
  97.     for (int q = 0; q < 10000; q++) {
  98.       for (int i = 0; i < COUNT; i++)
  99.         sum = _mm_add_epi32(sum, func4(data_r[i]));
  100.     }
  101.     res_r = sum;
  102.     int rsum = res_i[0] + res_i[1] + res_i[2] + res_i[3];
  103.     printf("Time = %0.3lf   (%d)\n", double(clock() - start) / CLOCKS_PER_SEC, rsum);
  104.   }
  105.  
  106.   return 0;
  107. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement