View difference between Paste ID: W1Vrk8dx and TxdHeQZY
SHOW: | | - or go back to the newest paste.
1
#include <emmintrin.h>
2
#include <stdlib.h>
3
#include <stdio.h>
4
#include <time.h>
5
6
#define NEAR 10
7
#define T1 100
8
#define T2 1000
9
#define T3 10000
10
11
int func(int Di) {
12
  if (Di <= -T3) return  -4;
13
  if (Di <= -T2) return  -3;
14
  if (Di <= -T1) return  -2;
15
  if (Di < -NEAR)  return  -1;
16
  if (Di <= NEAR) return   0;
17
  if (Di < T1)   return   1;
18
  if (Di < T2)   return   2;
19
  if (Di < T3)   return   3;
20
  return  4;
21
}
22
23
int funcX(int Di) {
24
  int cmp_m3 = -(Di > -T3);
25
  int cmp_m2 = -(Di > -T2);
26
  int cmp_m1 = -(Di > -T1);
27
  int cmp_p0 = -(Di > NEAR);
28
  int reduce_true = cmp_m3 + cmp_m2 + cmp_m1 + cmp_p0;
29
  int cmp_m0 = -(Di < -NEAR);
30
  int cmp_p1 = -(Di < T1);
31
  int cmp_p2 = -(Di < T2);
32
  int cmp_p3 = -(Di < T3);
33
  int reduce_false = cmp_p3 + cmp_p2 + cmp_p1 + cmp_m0;
34
  return reduce_false - reduce_true;
35
}
36
37
__m128i func4(__m128i D) {
38
  __m128i cmp_m3 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T3));
39
  __m128i cmp_m2 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T2));
40
  __m128i cmp_m1 = _mm_cmpgt_epi32(D, _mm_set1_epi32(-T1));
41
  __m128i cmp_p0 = _mm_cmpgt_epi32(D, _mm_set1_epi32(NEAR));
42
  __m128i reduce_true = _mm_add_epi32(_mm_add_epi32(cmp_m3, cmp_m2), _mm_add_epi32(cmp_m1, cmp_p0));
43
  __m128i cmp_m0 = _mm_cmplt_epi32(D, _mm_set1_epi32(-NEAR));
44
  __m128i cmp_p1 = _mm_cmplt_epi32(D, _mm_set1_epi32(T1));
45
  __m128i cmp_p2 = _mm_cmplt_epi32(D, _mm_set1_epi32(T2));
46
  __m128i cmp_p3 = _mm_cmplt_epi32(D, _mm_set1_epi32(T3));
47
  __m128i reduce_false = _mm_add_epi32(_mm_add_epi32(cmp_p3, cmp_p2), _mm_add_epi32(cmp_p1, cmp_m0));
48
  return _mm_sub_epi32(reduce_false, reduce_true);
49
}
50
51-
    data_i[i] = rand() - 16768;
51+
52
53
static union {
54
  int data_i[4 * COUNT];
55
  __m128i data_r[COUNT];
56
};
57
58
static union {
59
  int res_i[4];
60
  __m128i res_r;
61
};
62
63
int main() {
64
  for (int i = 0; i < 4*COUNT; i++)
65
    data_i[i] = rand() % 32768 - 16768;
66
67
  for (int i = 0; i < COUNT; i++) {
68
    int corr[4];
69
    for (int j = 0; j < 4; j++)
70
      corr[j] = func(data_i[4*i+j]);
71
72
    res_r = func4(data_r[i]);
73
    for (int j = 0; j < 4; j++)
74
      if (corr[j] != res_i[j])
75
        printf("Wrong!");
76
77
    for (int j = 0; j < 4; j++)
78
      res_i[j] = funcX(data_i[4*i+j]);
79
    for (int j = 0; j < 4; j++)
80
      if (corr[j] != res_i[j])
81
        printf("Wrong!");
82
  }
83
84
  {
85
    int start = clock();
86
    int sum = 0;
87
    for (int q = 0; q < 10000; q++) {
88
      for (int i = 0; i < 4*COUNT; i++)
89
        sum += func(data_i[i]);
90
    }
91
    printf("Time = %0.3lf   (%d)\n", double(clock() - start) / CLOCKS_PER_SEC, sum);
92
  }
93
94
  {
95
    int start = clock();
96
    __m128i sum = _mm_setzero_si128();
97
    for (int q = 0; q < 10000; q++) {
98
      for (int i = 0; i < COUNT; i++)
99
        sum = _mm_add_epi32(sum, func4(data_r[i]));
100
    }
101
    res_r = sum;
102
    int rsum = res_i[0] + res_i[1] + res_i[2] + res_i[3];
103
    printf("Time = %0.3lf   (%d)\n", double(clock() - start) / CLOCKS_PER_SEC, rsum);
104
  }
105
106
  return 0; 
107
}