• API
• FAQ
• Tools
• Trends
• Archive
daily pastebin goal
67%
SHARE
TWEET

# fastPow with union, with Benchmark + warmup

martinus Jan 25th, 2012 1,325 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. #include <iostream>
2. #include <cmath>
3. #include <conio.h>
4. #include <windows.h>
5.
6.
7. // uses union, no -fno-alias... required
8. inline double fastPow(const double a, const double b) {
9.   union {
10.     double d;
11.     int x[2];
12.   } u = { a };
13.   u.x[1] = (int)(b * (u.x[1] - 1072632447) + 1072632447);
14.   u.x[0] = 0;
15.   return u.d;
16. }
17.
18. inline double fastStructPow(const double a, const double b) {
19.   union {
20.     double d;
21.     struct {
22.       int a;
23.       int b;
24.     } s;
25.   } u = { a };
26.   u.s.b = (int)(b * (u.s.b - 1072632447) + 1072632447);
27.   u.s.a = 0;
28.   return u.d;
29. }
30.
31.
32. // old version
33. inline double oldFastPow(const double a, const double b)
34. {
35.   int tmp = *(1 + (int*)&a);
36.   int tmp2 = (int)(b * (tmp - 1072632447) + 1072632447);
37.   double p = 0.0;
38.   *(1 + (int * )&p) = tmp2;
39.   return p;
40. }
41.
42. // should be much more precise with large b
43. inline double fastPrecisePow(double a, const double b) {
44.   // calculate approximation with just the fraction of the exponent
45.   int exp = (int) b;
46.   union {
47.     double d;
48.     int x[2];
49.   } u = { a };
50.   u.x[1] = (int)((b - exp) * (u.x[1] - 1072632447) + 1072632447);
51.   u.x[0] = 0;
52.
53.   // exponentiation by squaring with the exponent's integer part
54.   // double r = u.d makes everything much slower, not sure why
55.   double r = 1.0;
56.   while (exp) {
57.     if (exp & 1) {
58.       r *= a;
59.     }
60.     a *= a;
61.     exp >>= 1;
62.   }
63.
64.   return r * u.d;
65. }
66.
67.
68.
69. int main() {
70.   double x_max = 4.5423;
71.   double y_max = 32.3211;
72.
73.   size_t times = 300000000;
74.   double dx = x_max / times;
75.   double dy = y_max / times;
76.
77.   // warmup
78.   LARGE_INTEGER frequency;
79.   QueryPerformanceFrequency(&frequency);
80.   LARGE_INTEGER start;
81.   LARGE_INTEGER end;
82.   QueryPerformanceCounter(&start);
83.   double sum = 0;
84.   double x = 0.1;
85.   double y = 0.1;
86.   double a = 1.0 / times;
87.   for (size_t i=0; i<times; ++i) {
88.     x += dx;
89.     y += dy;
90.     sum += x * y + x + y / (i - y);
91.   }
92.   QueryPerformanceCounter(&end);
93.   std::cout << sum << " warmup" << std::endl << std::endl;
94.
95.
96.   // original pow ///////////////////////////////////
97.   QueryPerformanceCounter(&start);
98.   sum = 0;
99.   x = 0;
100.   y = 0;
101.   for (size_t i=0; i<times; ++i) {
102.     x += dx;
103.     y += dy;
104.     sum += pow(x, y);
105.   }
106.   QueryPerformanceCounter(&end);
108.   std::cout << sum << " pow() " << t_pow << " sec" << std::endl;
109.
110.
111.   // fastPow ////////////////////////////////
112.   QueryPerformanceCounter(&start);
113.   sum = 0;
114.   x = 0;
115.   y = 0;
116.   for (size_t i=0; i<times; ++i) {
117.     x += dx;
118.     y += dy;
119.     sum += fastPow(x, y);
120.   }
121.   QueryPerformanceCounter(&end);
123.   std::cout << sum << " fastPow() " << t_fastPow << " sec, speedup=" << (t_pow / t_fastPow) << std::endl;
124.
125.   // oldFastPow ////////////////////////////////
126.   QueryPerformanceCounter(&start);
127.   sum = 0;
128.   x = 0;
129.   y = 0;
130.   for (size_t i=0; i<times; ++i) {
131.     x += dx;
132.     y += dy;
133.     sum += oldFastPow(x, y);
134.   }
135.   QueryPerformanceCounter(&end);
137.   std::cout << sum << " oldFastPow() " << t_oldFastPow << " sec, speedup=" << (t_pow / t_oldFastPow) << std::endl;
138.
139.   // fastStructPow ////////////////////////////////
140.   QueryPerformanceCounter(&start);
141.   sum = 0;
142.   x = 0;
143.   y = 0;
144.   for (size_t i=0; i<times; ++i) {
145.     x += dx;
146.     y += dy;
147.     sum += fastStructPow(x, y);
148.   }
149.   QueryPerformanceCounter(&end);
151.   std::cout << sum << " fastStructPow() " << t_fastStructPow << " sec, speedup=" << (t_pow / t_fastStructPow) << std::endl;
152.
153.
154.   // fastPrecisePow ////////////////////////////////
155.   QueryPerformanceCounter(&start);
156.   sum = 0;
157.   x = 0;
158.   y = 0;
159.   for (size_t i=0; i<times; ++i) {
160.     x += dx;
161.     y += dy;
162.     sum += fastPrecisePow(x, y);
163.   }
164.   QueryPerformanceCounter(&end);