Advertisement
Guest User

benchmark quake "fast" inverse sqrt compared to _mm_rsqrt_ps

a guest
Feb 5th, 2016
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.31 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <math.h>
  3. #include <windows.h>
  4. #include <xmmintrin.h>
  5.  
  6. #define N 12345678
  7.  
  8. double sec(){
  9.     LARGE_INTEGER t, frequency;
  10.     QueryPerformanceFrequency(&frequency);
  11.     QueryPerformanceCounter(&t);
  12.     return t.QuadPart / (double)frequency.QuadPart;
  13. }
  14.  
  15. float slow_imprecise_sqrt(float number){
  16.     long i;
  17.     float x2, y;
  18.     const float threehalfs = 1.5F;
  19.  
  20.     x2 = number * 0.5F;
  21.     y  = number;
  22.     i  = * ( long * ) &y;                       // evil floating point bit level hacking
  23.     i  = 0x5f3759df - ( i >> 1 );               // what the fuck?
  24.     y  = * ( float * ) &i;
  25.     y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration
  26.     //y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed
  27.  
  28.     return y;
  29. }
  30.  
  31. // printed at the end so the compiler does not optimize the loop away
  32. float anti_optimizer = 0.0f;
  33.  
  34. double naive_benchmark(float checksum){
  35.     double t = sec();
  36.  
  37.     size_t i;
  38.     for (i = 0; i < N; i++){
  39.         checksum = 1.0f/sqrtf(checksum);
  40.     }
  41.  
  42.     anti_optimizer += checksum;
  43.  
  44.     return sec() - t;
  45. }
  46.  
  47. double sse_benchmark(float checksum){
  48.     double t = sec();
  49.  
  50.     __m128 x = _mm_set1_ps(checksum);
  51.  
  52.     size_t i;
  53.     for (i = 0; i < N; i++){
  54.         x = _mm_rsqrt_ps(x);
  55.     }
  56.  
  57.     float *f = (float*)&x;
  58.  
  59.     anti_optimizer += f[0] + f[1] + f[2] + f[3];
  60.  
  61.     return sec() - t;
  62. }
  63.  
  64. double quake_benchmark(float checksum){
  65.     double t = sec();
  66.  
  67.     size_t i;
  68.     for (i = 0; i < N; i++){
  69.         checksum = slow_imprecise_sqrt(checksum);
  70.     }
  71.  
  72.     anti_optimizer += checksum;
  73.  
  74.     return sec() - t;
  75. }
  76.  
  77. void run_benchmark(double (*benchmark)(float), const char *name){
  78.     double fastest_benchmark = 1e20;
  79.  
  80.     // always 1 but the compiler does not know so it can't optimize
  81.     float checksum = rand() >= 0;
  82.  
  83.     // run a few times and print the fastest time
  84.     size_t i;
  85.     for (i = 0; i < 10; i++){
  86.         double dt = benchmark(checksum);
  87.         if (dt < fastest_benchmark) fastest_benchmark = dt;
  88.     }
  89.  
  90.     printf("%f milliseconds for %s benchmark\n", 1000.0*fastest_benchmark, name);
  91. }
  92.  
  93. int main(){
  94.     run_benchmark(naive_benchmark, "naive");
  95.     run_benchmark(sse_benchmark, "sse");
  96.     run_benchmark(quake_benchmark, "quake");
  97.  
  98.     printf("ignore: %f\n", anti_optimizer);
  99.  
  100.     return 0;
  101. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement