Advertisement
martinus

fastPow with union, with Benchmark + warmup

Jan 25th, 2012
3,963
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.18 KB | None | 0 0
  1. #include <iostream>
  2. #include <cmath>
  3. #include <conio.h>
  4. #include <windows.h>
  5.  
  6.  
  7. // uses union, no -fno-alias... required
  8. inline double fastPow(const double a, const double b) {
  9.   union {
  10.     double d;
  11.     int x[2];
  12.   } u = { a };
  13.   u.x[1] = (int)(b * (u.x[1] - 1072632447) + 1072632447);
  14.   u.x[0] = 0;
  15.   return u.d;
  16. }
  17.  
  18. inline double fastStructPow(const double a, const double b) {
  19.   union {
  20.     double d;
  21.     struct {
  22.       int a;
  23.       int b;
  24.     } s;
  25.   } u = { a };
  26.   u.s.b = (int)(b * (u.s.b - 1072632447) + 1072632447);
  27.   u.s.a = 0;
  28.   return u.d;
  29. }
  30.  
  31.  
  32. // old version
  33. inline double oldFastPow(const double a, const double b)
  34. {
  35.   int tmp = *(1 + (int*)&a);
  36.   int tmp2 = (int)(b * (tmp - 1072632447) + 1072632447);
  37.   double p = 0.0;
  38.   *(1 + (int * )&p) = tmp2;
  39.   return p;
  40. }
  41.  
  42. // should be much more precise with large b
  43. inline double fastPrecisePow(double a, const double b) {
  44.   // calculate approximation with just the fraction of the exponent
  45.   int exp = (int) b;
  46.   union {
  47.     double d;
  48.     int x[2];
  49.   } u = { a };
  50.   u.x[1] = (int)((b - exp) * (u.x[1] - 1072632447) + 1072632447);
  51.   u.x[0] = 0;
  52.  
  53.   // exponentiation by squaring with the exponent's integer part
  54.   // double r = u.d makes everything much slower, not sure why
  55.   double r = 1.0;
  56.   while (exp) {
  57.     if (exp & 1) {
  58.       r *= a;
  59.     }
  60.     a *= a;
  61.     exp >>= 1;
  62.   }
  63.  
  64.   return r * u.d;
  65. }
  66.  
  67.  
  68.  
  69. int main() {
  70.   double x_max = 4.5423;
  71.   double y_max = 32.3211;
  72.  
  73.   size_t times = 300000000;
  74.   double dx = x_max / times;
  75.   double dy = y_max / times;
  76.  
  77.   // warmup
  78.   LARGE_INTEGER frequency;
  79.   QueryPerformanceFrequency(&frequency);
  80.   LARGE_INTEGER start;
  81.   LARGE_INTEGER end;
  82.   QueryPerformanceCounter(&start);
  83.   double sum = 0;
  84.   double x = 0.1;
  85.   double y = 0.1;
  86.   double a = 1.0 / times;
  87.   for (size_t i=0; i<times; ++i) {
  88.     x += dx;
  89.     y += dy;
  90.     sum += x * y + x + y / (i - y);
  91.   }
  92.   QueryPerformanceCounter(&end);
  93.   std::cout << sum << " warmup" << std::endl << std::endl;
  94.  
  95.  
  96.   // original pow ///////////////////////////////////
  97.   QueryPerformanceCounter(&start);
  98.   sum = 0;
  99.   x = 0;
  100.   y = 0;
  101.   for (size_t i=0; i<times; ++i) {
  102.     x += dx;
  103.     y += dy;
  104.     sum += pow(x, y);
  105.   }
  106.   QueryPerformanceCounter(&end);
  107.   double t_pow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
  108.   std::cout << sum << " pow() " << t_pow << " sec" << std::endl;
  109.  
  110.  
  111.   // fastPow ////////////////////////////////
  112.   QueryPerformanceCounter(&start);
  113.   sum = 0;
  114.   x = 0;
  115.   y = 0;
  116.   for (size_t i=0; i<times; ++i) {
  117.     x += dx;
  118.     y += dy;
  119.     sum += fastPow(x, y);
  120.   }
  121.   QueryPerformanceCounter(&end);
  122.   double t_fastPow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
  123.   std::cout << sum << " fastPow() " << t_fastPow << " sec, speedup=" << (t_pow / t_fastPow) << std::endl;
  124.  
  125.   // oldFastPow ////////////////////////////////
  126.   QueryPerformanceCounter(&start);
  127.   sum = 0;
  128.   x = 0;
  129.   y = 0;
  130.   for (size_t i=0; i<times; ++i) {
  131.     x += dx;
  132.     y += dy;
  133.     sum += oldFastPow(x, y);
  134.   }
  135.   QueryPerformanceCounter(&end);
  136.   double t_oldFastPow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
  137.   std::cout << sum << " oldFastPow() " << t_oldFastPow << " sec, speedup=" << (t_pow / t_oldFastPow) << std::endl;
  138.  
  139.   // fastStructPow ////////////////////////////////
  140.   QueryPerformanceCounter(&start);
  141.   sum = 0;
  142.   x = 0;
  143.   y = 0;
  144.   for (size_t i=0; i<times; ++i) {
  145.     x += dx;
  146.     y += dy;
  147.     sum += fastStructPow(x, y);
  148.   }
  149.   QueryPerformanceCounter(&end);
  150.   double t_fastStructPow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
  151.   std::cout << sum << " fastStructPow() " << t_fastStructPow << " sec, speedup=" << (t_pow / t_fastStructPow) << std::endl;
  152.  
  153.  
  154.   // fastPrecisePow ////////////////////////////////
  155.   QueryPerformanceCounter(&start);
  156.   sum = 0;
  157.   x = 0;
  158.   y = 0;
  159.   for (size_t i=0; i<times; ++i) {
  160.     x += dx;
  161.     y += dy;
  162.     sum += fastPrecisePow(x, y);
  163.   }
  164.   QueryPerformanceCounter(&end);
  165.   double t_fastPrecisePow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
  166.   std::cout << sum << " fastPrecisePow() " << t_fastPrecisePow << " sec, speedup=" << (t_pow / t_fastPrecisePow) << std::endl;
  167. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement