Advertisement
Guest User

Untitled

a guest
Dec 20th, 2012
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 7.27 KB | None | 0 0
  1. /*
  2.  * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
  3.  *
  4.  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5.  * with this source code for terms and conditions that govern your use of
  6.  * this software. Any use, reproduction, disclosure, or distribution of
  7.  * this software and related documentation outside the terms of the EULA
  8.  * is strictly prohibited.
  9.  *
  10.  */
  11.  
  12. #include "cuda_runtime.h"
  13. #include "device_launch_parameters.h"
  14.  
  15. // Type-specific implementation of rounded arithmetic operators.
  16. // Thin layer over the CUDA intrinsics.
  17.  
  18. #ifndef CUDA_INTERVAL_ROUNDED_ARITH_H
  19. #define CUDA_INTERVAL_ROUNDED_ARITH_H
  20.  
  21.  
  22. // Temporary workaround for CUDA 3.0/3.1 on SM 1.3 devices:
  23. // missing double-precision div/sqrt with directed rounding
  24. #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ <= 130
  25. #define NO_DOUBLE_DIV
  26. #endif
  27.  
  28. // Generic class, no actual implementation yet
  29. template<class T>
  30. struct rounded_arith
  31. {
  32.     __device__ T add_down (const T& x, const T& y);
  33.     __device__ T add_up   (const T& x, const T& y);
  34.     __device__ T sub_down (const T& x, const T& y);
  35.     __device__ T sub_up   (const T& x, const T& y);
  36.     __device__ T mul_down (const T& x, const T& y);
  37.     __device__ T mul_up   (const T& x, const T& y);
  38.     __device__ T div_down (const T& x, const T& y);
  39.     __device__ T div_up   (const T& x, const T& y);
  40.     __device__ T median   (const T& x, const T& y);
  41.     __device__ T sqrt_down(const T& x);
  42.     __device__ T sqrt_up  (const T& x);
  43.     __device__ T int_down (const T& x);
  44.     __device__ T int_up   (const T& x);
  45.    
  46.     __device__ T pos_inf();
  47.     __device__ T neg_inf();
  48.     __device__ __host__ T nan();
  49.     __device__ T min(T const & x, T const & y);
  50.     __device__ T max(T const & x, T const & y);
  51. };
  52.  
  53. // Specialization for float
  54. template<>
  55. struct rounded_arith<float>
  56. {
  57.     __device__ float add_down (const float& x, const float& y) {
  58.         return __fadd_rd(x, y);
  59.     }
  60.  
  61.     __device__ float add_up   (const float& x, const float& y) {
  62.         return __fadd_ru(x, y);
  63.     }
  64.    
  65.     __device__ float sub_down (const float& x, const float& y) {
  66.         return __fadd_rd(x, -y);
  67.     }
  68.    
  69.     __device__ float sub_up   (const float& x, const float& y) {
  70.         return __fadd_ru(x, -y);
  71.     }
  72.    
  73.     __device__ float mul_down (const float& x, const float& y) {
  74.         return __fmul_rd(x, y);
  75.     }
  76.    
  77.     __device__ float mul_up   (const float& x, const float& y) {
  78.         return __fmul_ru(x, y);
  79.     }
  80.    
  81.     __device__ float div_down (const float& x, const float& y) {
  82.         return __fdiv_rd(x, y);
  83.     }
  84.    
  85.     __device__ float div_up   (const float& x, const float& y) {
  86.         return __fdiv_ru(x, y);
  87.     }
  88.    
  89.     __device__ float median   (const float& x, const float& y) {
  90.         return (x + y) * .5f;
  91.     }
  92.    
  93.     __device__ float sqrt_down(const float& x) {
  94.         return __fsqrt_rd(x);
  95.     }
  96.    
  97.     __device__ float sqrt_up  (const float& x) {
  98.         return __fsqrt_ru(x);
  99.     }
  100.    
  101.     __device__ float int_down (const float& x) {
  102.         return floorf(x);
  103.     }
  104.    
  105.     __device__ float int_up   (const float& x) {
  106.         return ceilf(x);
  107.     }
  108.  
  109.     __device__ float neg_inf() {
  110.         return __int_as_float(0xff800000);
  111.     }
  112.  
  113.     __device__ float pos_inf() {
  114.         return __int_as_float(0x7f800000);
  115.     }
  116.  
  117.     __device__ __host__ float nan() {
  118.         return nanf("");
  119.     }
  120.    
  121.     __device__ float min(float const & x, float const & y) {
  122.         return fminf(x, y);
  123.     }
  124.  
  125.     __device__ float max(float const & x, float const & y) {
  126.         return fmaxf(x, y);
  127.     }
  128. };
  129.  
  130. __device__ double my_div_ru(double a, double b)
  131. {
  132.     int expo;
  133.     b = frexp(b, &expo);
  134.     double y0 = (double)(1.0f / __double2float_rn(b));    // y0 22 bits
  135.  
  136.     // Cubic iteration from Alex
  137.     double e1 = __fma_rn(-b, y0, 1.0);
  138.     double e2 = __fma_rn(e1, e1, e1);
  139.     double y2 = __fma_rn(y0, e2, y0);        // y2 ~ 1/b 66 bits + rounding RN => faithful
  140.  
  141.     double e3 = __fma_rn(-b, y2, 1.0);       // M
  142.     double y3 = __fma_rn(y2, e3, y2);        // y3 ~ 1/b correct except for 1/(2-eps)
  143.  
  144.     double q0 = __dmul_rn(a, y0);
  145.     double r0 = __fma_rn(-q0, b, a);         // M
  146.     double q1 = __fma_rn(r0, y2, q0);        // q1 ~ a/b faithful
  147.    
  148.     double r1 = __fma_ru(-q1, b, a);         // M
  149.     double q2 = __fma_ru(r1, y3, q1);        // q2 = a/b correct
  150.     return ldexp(q2, -expo);
  151. }
  152.  
  153. __device__ double my_div_rd(double a, double b)
  154. {
  155.     int expo;
  156.     b = frexp(b, &expo);
  157.     double y0 = (double)(1.0f / __double2float_rn(b));    // y0 22 bits
  158.  
  159.     double e1 = __fma_rn(-b, y0, 1.0);
  160.     double e2 = __fma_rn(e1, e1, e1);
  161.     double y2 = __fma_rn(y0, e2, y0);        // y2 ~ 1/b 66 bits + rounding RN => faithful
  162.  
  163.     double e3 = __fma_rn(-b, y2, 1.0);       // M
  164.     double y3 = __fma_rn(y2, e3, y2);        // y3 ~ 1/b correct except for 1/(2-eps)
  165.  
  166.     double q0 = __dmul_rn(a, y0);
  167.     double r0 = __fma_rn(-q0, b, a);         // M
  168.     double q1 = __fma_rn(r0, y2, q0);        // q1 ~ a/b faithful
  169.    
  170.     double r1 = __fma_rd(-q1, b, a);         // M
  171.     double q2 = __fma_rd(r1, y3, q1);
  172.     return ldexp(q2, -expo);
  173. }
  174.  
  175. // Specialization for double
  176. template<>
  177. struct rounded_arith<double>
  178. {
  179.     __device__ double add_down (const double& x, const double& y) {
  180.         return __dadd_rd(x, y);
  181.     }
  182.  
  183.     __device__ double add_up   (const double& x, const double& y) {
  184.         return __dadd_ru(x, y);
  185.     }
  186.    
  187.     __device__ double sub_down (const double& x, const double& y) {
  188.         return __dadd_rd(x, -y);
  189.     }
  190.    
  191.     __device__ double sub_up   (const double& x, const double& y) {
  192.         return __dadd_ru(x, -y);
  193.     }
  194.    
  195.     __device__ double mul_down (const double& x, const double& y) {
  196.         return __dmul_rd(x, y);
  197.     }
  198.    
  199.     __device__ double mul_up   (const double& x, const double& y) {
  200.         return __dmul_ru(x, y);
  201.     }
  202.    
  203.     __device__ double div_down (const double& x, const double& y) {
  204. #ifndef NO_DOUBLE_DIV
  205.         return __ddiv_rd(x, y);
  206. #else
  207.         return my_div_rd(x, y);
  208. #endif
  209.     }
  210.    
  211.     __device__ double div_up   (const double& x, const double& y) {
  212. #ifndef NO_DOUBLE_DIV
  213.         return __ddiv_ru(x, y);
  214. #else
  215.         return my_div_ru(x, y);
  216. #endif    
  217.     }
  218.     __device__ double median   (const double& x, const double& y) {
  219.         return (x + y) * .5;
  220.     }
  221.    
  222. #ifndef NO_DOUBLE_DIV
  223.     __device__ double sqrt_down(const double& x) {
  224.         return __dsqrt_rd(x);
  225.     }
  226.    
  227.     __device__ double sqrt_up  (const double& x) {
  228.         return __dsqrt_ru(x);
  229.     }
  230. #endif    
  231.     __device__ double int_down (const double& x) {
  232.         return floor(x);
  233.     }
  234.    
  235.     __device__ double int_up   (const double& x) {
  236.         return ceil(x);
  237.     }
  238.  
  239.     __device__ double neg_inf() {
  240.         return __longlong_as_double(0xfff0000000000000ull);
  241.     }
  242.  
  243.     __device__ double pos_inf() {
  244.         return __longlong_as_double(0x7ff0000000000000ull);
  245.     }
  246.     __device__ __host__ double nan() {
  247.         return ::nan("");
  248.     }
  249.  
  250.     __device__ double min(double const & x, double const & y) {
  251.         return fmin(x, y);
  252.     }
  253.  
  254.     __device__ double max(double const & x, double const & y) {
  255.         return fmax(x, y);
  256.     }
  257. };
  258.  
  259. #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement