Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <cmath>
- #include <conio.h>
- #include <windows.h>
- // uses union, no -fno-alias... required
- inline double fastPow(const double a, const double b) {
- union {
- double d;
- int x[2];
- } u = { a };
- u.x[1] = (int)(b * (u.x[1] - 1072632447) + 1072632447);
- u.x[0] = 0;
- return u.d;
- }
- inline double fastStructPow(const double a, const double b) {
- union {
- double d;
- struct {
- int a;
- int b;
- } s;
- } u = { a };
- u.s.b = (int)(b * (u.s.b - 1072632447) + 1072632447);
- u.s.a = 0;
- return u.d;
- }
- // old version
- inline double oldFastPow(const double a, const double b)
- {
- int tmp = *(1 + (int*)&a);
- int tmp2 = (int)(b * (tmp - 1072632447) + 1072632447);
- double p = 0.0;
- *(1 + (int * )&p) = tmp2;
- return p;
- }
- // should be much more precise with large b
- inline double fastPrecisePow(double a, const double b) {
- // calculate approximation with just the fraction of the exponent
- int exp = (int) b;
- union {
- double d;
- int x[2];
- } u = { a };
- u.x[1] = (int)((b - exp) * (u.x[1] - 1072632447) + 1072632447);
- u.x[0] = 0;
- // exponentiation by squaring with the exponent's integer part
- // double r = u.d makes everything much slower, not sure why
- double r = 1.0;
- while (exp) {
- if (exp & 1) {
- r *= a;
- }
- a *= a;
- exp >>= 1;
- }
- return r * u.d;
- }
- int main() {
- double x_max = 4.5423;
- double y_max = 32.3211;
- size_t times = 300000000;
- double dx = x_max / times;
- double dy = y_max / times;
- // warmup
- LARGE_INTEGER frequency;
- QueryPerformanceFrequency(&frequency);
- LARGE_INTEGER start;
- LARGE_INTEGER end;
- QueryPerformanceCounter(&start);
- double sum = 0;
- double x = 0.1;
- double y = 0.1;
- double a = 1.0 / times;
- for (size_t i=0; i<times; ++i) {
- x += dx;
- y += dy;
- sum += x * y + x + y / (i - y);
- }
- QueryPerformanceCounter(&end);
- std::cout << sum << " warmup" << std::endl << std::endl;
- // original pow ///////////////////////////////////
- QueryPerformanceCounter(&start);
- sum = 0;
- x = 0;
- y = 0;
- for (size_t i=0; i<times; ++i) {
- x += dx;
- y += dy;
- sum += pow(x, y);
- }
- QueryPerformanceCounter(&end);
- double t_pow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
- std::cout << sum << " pow() " << t_pow << " sec" << std::endl;
- // fastPow ////////////////////////////////
- QueryPerformanceCounter(&start);
- sum = 0;
- x = 0;
- y = 0;
- for (size_t i=0; i<times; ++i) {
- x += dx;
- y += dy;
- sum += fastPow(x, y);
- }
- QueryPerformanceCounter(&end);
- double t_fastPow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
- std::cout << sum << " fastPow() " << t_fastPow << " sec, speedup=" << (t_pow / t_fastPow) << std::endl;
- // oldFastPow ////////////////////////////////
- QueryPerformanceCounter(&start);
- sum = 0;
- x = 0;
- y = 0;
- for (size_t i=0; i<times; ++i) {
- x += dx;
- y += dy;
- sum += oldFastPow(x, y);
- }
- QueryPerformanceCounter(&end);
- double t_oldFastPow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
- std::cout << sum << " oldFastPow() " << t_oldFastPow << " sec, speedup=" << (t_pow / t_oldFastPow) << std::endl;
- // fastStructPow ////////////////////////////////
- QueryPerformanceCounter(&start);
- sum = 0;
- x = 0;
- y = 0;
- for (size_t i=0; i<times; ++i) {
- x += dx;
- y += dy;
- sum += fastStructPow(x, y);
- }
- QueryPerformanceCounter(&end);
- double t_fastStructPow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
- std::cout << sum << " fastStructPow() " << t_fastStructPow << " sec, speedup=" << (t_pow / t_fastStructPow) << std::endl;
- // fastPrecisePow ////////////////////////////////
- QueryPerformanceCounter(&start);
- sum = 0;
- x = 0;
- y = 0;
- for (size_t i=0; i<times; ++i) {
- x += dx;
- y += dy;
- sum += fastPrecisePow(x, y);
- }
- QueryPerformanceCounter(&end);
- double t_fastPrecisePow = ((end.QuadPart - start.QuadPart) * 1.0 / frequency.QuadPart);
- std::cout << sum << " fastPrecisePow() " << t_fastPrecisePow << " sec, speedup=" << (t_pow / t_fastPrecisePow) << std::endl;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement