Advertisement
Guest User

Untitled

a guest
Feb 18th, 2014
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 28.82 KB | None | 0 0
  1. /**
  2.  * Helper functions used for the diagnostic of the vdt routines.
  3.  * They are not optimised for speed.
  4.  * Authors: Danilo Piparo CERN
  5.  **/
  6.  
  7.  
  8. /*
  9.  * VDT is free software: you can redistribute it and/or modify
  10.  * it under the terms of the GNU Lesser Public License as published by
  11.  * the Free Software Foundation, either version 3 of the License, or
  12.  * (at your option) any later version.
  13.  *
  14.  * This program is distributed in the hope that it will be useful,
  15.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17.  * GNU Lesser Public License for more details.
  18.  *
  19.  * You should have received a copy of the GNU Lesser Public License
  20.  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21.  */
  22.  
  23. #ifndef VDTHELPER_H_
  24. #define VDTHELPER_H_
  25.  
  26. #include <bitset>
  27. #include <iostream>
  28. #include <sstream>
  29. #include <string>
  30. #include <functional>
  31. #include "inttypes.h"
  32. // #include "x86intrin.h"
  33. #include <cmath> //for log2
  34. #include "time.h"
  35. #include "sys/time.h"
  36.  
  37. #ifdef __APPLE__
  38. #include <CoreServices/CoreServices.h>
  39. #include <mach/mach.h>
  40. #include <mach/mach_time.h>
  41. #include <unistd.h>
  42. #endif
  43.  
  44.  
  45. namespace{
  46.  
  47. // Establish the size of the double and single precision and the bitsets
  48. constexpr double _tmp=0;
  49. constexpr uint32_t dp_size_in_bits = sizeof(_tmp)*8;
  50. using dp_bitset = std::bitset<dp_size_in_bits>;
  51.  
  52. }
  53.  
  54. namespace vdth{
  55.  
  56. //------------------------------------------------------------------------------
  57.  
  58. // Useful alias for some functions
  59. using dpdpfunction = std::function<double(double)>;
  60. using dpdpfunctionv = std::function<void (uint32_t, double*, double*)>;
  61. using spspfunction = std::function<float(float)>;
  62. using spspfunctionv = std::function<void(uint32_t, float*, float*)>;
  63.  
  64. using dpdp2function = std::function<double(double,double)>;
  65. using dpdp2functionv = std::function<void (uint32_t, double*, double*, double*)>;
  66. using spsp2function = std::function<float(float,float)>;
  67. using spsp2functionv = std::function<void(uint32_t, float*, float*, float*)>;
  68. //maybe for convenience
  69. template<class T> using genfpfunction = std::function<T(T)>;
  70. template<class T> using genfpfunctionv = std::function<void(uint32_t, T*, T*)>;
  71. template<class T> using genfp2function = std::function<T(T,T)>;
  72. template<class T> using genfp2functionv = std::function<void(uint32_t, T*, T*, T*)>;
  73. //------------------------------------------------------------------------------
  74. /// Useful union
  75.  
  76. union standard{
  77.     double dp;
  78.     float sp[2];
  79.     uint64_t li;
  80. };
  81.  
  82. //------------------------------------------------------------------------------
  83.  
  84. template<class T>
  85. uint32_t inline getSizeInbits(const T x){
  86.     return sizeof(x) * 8;
  87. }
  88.  
  89. //------------------------------------------------------------------------------
  90.  
  91. /// Convert a fp into a uint64_t not optimised for speed
  92. template<class T>
  93. inline uint64_t fp2uint64(const T x){
  94.     const uint32_t size = getSizeInbits(x);
  95.     standard conv;
  96.     conv.dp=0.;
  97.     if (size==dp_size_in_bits)
  98.         conv.dp=x;
  99.     else
  100.         conv.sp[0]=x;
  101.     return conv.li;
  102.  
  103. }
  104.  
  105. //------------------------------------------------------------------------------
  106. /// Convert a double into a bitset
  107. template<class T>
  108. inline const dp_bitset fp2bs( const T x ){
  109.     dp_bitset const bits (fp2uint64(x));
  110.     return bits;
  111. }
  112.  
  113. //------------------------------------------------------------------------------
  114. /// Print as a dp formatted bitset
  115. template<class T>
  116. const std::string getbsasstr(const T x){
  117.  
  118.     const uint32_t size = getSizeInbits(x);
  119.  
  120.     uint32_t offset = 0;
  121.     uint32_t exp_size = 11;
  122.     uint32_t mant_size = 52;
  123.     if (size!=dp_size_in_bits){
  124.         offset = 32;
  125.         exp_size = 8;
  126.         mant_size = 23;
  127.     }
  128.  
  129.     // Convert the bitstream to string
  130.     std::string bitset_as_string (fp2bs(x).to_string());
  131.  
  132.     std::ostringstream os;
  133.  
  134.     // sign
  135.     os  << bitset_as_string[offset] << " ";
  136.     // exponent
  137.     for (unsigned int i=offset+1;i<offset+1+exp_size;i++)
  138.         os <<  bitset_as_string[i];
  139.     os << " ";
  140.     //mantissa
  141.     for (unsigned int i=offset+1+exp_size;i<offset+1+exp_size+mant_size;i++)
  142.         os <<  bitset_as_string[i];
  143.  
  144.     return os.str();
  145. }
  146.  
  147.  
  148. //------------------------------------------------------------------------------
  149. /// Returns most significative different bit dp
  150. template <class T>
  151. uint16_t diffbit(const T a,const T b ){
  152.     /// make a xor
  153.     uint64_t ia = fp2uint64(a);
  154.     uint64_t ib = fp2uint64(b);
  155.     uint64_t c = ia>ib? ia-ib : ib -ia;
  156.     //uint64_t c = ia^ib;
  157.     /// return the log2+1
  158.     return log2(c)+1;
  159. }
  160.  
  161. //------------------------------------------------------------------------------
  162.  
  163. ///Check and print which instructions sets are enabled.
  164. void print_instructions_info(){
  165.  
  166.     std::ostringstream os;
  167.     os << "List of enabled instructions' sets:\n";
  168.  
  169.     os << " o SSE2 instructions set "
  170. #ifndef __SSE2__
  171.             << "not "
  172. #endif
  173.             << "enabled.\n"
  174.  
  175.             << " o SSE3 instructions set "
  176. #ifndef __SSE3__
  177.             << "not "
  178. #endif
  179.             << "enabled.\n"
  180.  
  181.             << " o SSE4.1 instructions set "
  182. #ifndef __SSE4_1__
  183.             << "not "
  184. #endif
  185.             << "enabled.\n"
  186.  
  187.             << " o AVX instructions set "
  188. #ifndef __AVX__
  189.             << "not "
  190. #endif
  191.             << "enabled.\n";
  192.     std::cout << os.str();
  193. }
  194.  
  195. //------------------------------------------------------------------------------
  196.  
  197. /// Print the different bit of two fp numbers
  198. template<class T>
  199. void print_different_bit(const T a, const T b, const bool show_identical=true){
  200.  
  201.     std::cout.precision(10);
  202.     std::cout << "Different bit between " << a << " and " << b
  203.             << " is " << diffbit(a,b) << std::endl;
  204.     if (show_identical)
  205.         std::cout << getbsasstr(a) << std::endl
  206.         << getbsasstr(b) << std::endl<< std::endl;
  207. }
  208.  
  209.  
  210. //------------------------------------------------------------------------------
  211.  
  212. /// Invoke two functions and print on screen their argument and different bits
  213. template<class T>
  214. void printFuncDiff(const std::string& func_name, std::function<T(T)> f1,std::function<T(T)> f2, const T x){
  215.     std::cout << "Function " << func_name << "(" << x << ")" <<  std::endl;
  216.     print_different_bit(f1(x),f2(x),true);
  217. }
  218.  
  219. /// Invoke two functions and print on screen their argument and different bits
  220. template<class T>
  221. void printFuncDiff(const std::string& func_name, std::function<T(T,T)> f1,std::function<T(T,T)> f2, const T x, const T y){
  222.     std::cout << "Function " << func_name << "(" << x << ", "<< y <<")" <<  std::endl;
  223.     print_different_bit(f1(x,y),f2(x,y),true);
  224. }
  225.  
  226. //------------------------------------------------------------------------------
  227.  
  228. /// Invoke two functions and print on screen their argument and different bits
  229. template<class T>
  230. void printFuncDiff(const std::string& func_name,
  231.         genfpfunctionv<T> f1,
  232.         genfpfunctionv<T> f2,
  233.         T* x_arr,
  234.         const uint32_t size){
  235.     std::cout << "Function " << func_name << std::endl;
  236.     T* res_1 = new T[size];
  237.     f1(size,x_arr,res_1);
  238.     T* res_2 = new T[size];
  239.     f2(size,x_arr,res_2);
  240.     for (uint32_t i=0;i<size;i++){
  241.         std::cout << "Calculated in " << x_arr[i] << std::endl;
  242.         print_different_bit(res_1[i],res_2[i],true);
  243.     };
  244.     delete [] res_1;
  245.     delete [] res_2;
  246. }
  247.  
  248. /// Invoke two functions and print on screen their argument and different bits
  249. template<class T>
  250. void printFuncDiff(const std::string& func_name,
  251.         genfp2functionv<T> f1,
  252.         genfp2functionv<T> f2,
  253.         T* x_arr,
  254.         T* y_arr,
  255.         const uint32_t size){
  256.     std::cout << "Function " << func_name << std::endl;
  257.     T* res_1 = new T[size];
  258.     f1(size,x_arr,y_arr,res_1);
  259.     T* res_2 = new T[size];
  260.     f2(size,x_arr,y_arr,res_2);
  261.     for (uint32_t i=0;i<size;i++){
  262.         std::cout << "Calculated in (" << x_arr[i] << ", " << y_arr[i] << ")" << std::endl;
  263.         print_different_bit(res_1[i],res_2[i],true);
  264.     };
  265.     delete [] res_1;
  266.     delete [] res_2;
  267. }
  268.  
  269. //------------------------------------------------------------------------------
  270. // Function tests
  271. /// Test a fp function with a double (double) signatures
  272. template<class T>
  273. void printFuncDiff(const std::string& name,
  274.         std::function<T(T)> fpfunction,
  275.         std::function<T(T)> fpfunction_ref,
  276.         T* fpvals,
  277.         const uint32_t size){
  278.  
  279.     for (uint32_t i=0;i<size;i++)
  280.         printFuncDiff ( name,
  281.                 (std::function<T(T)>) fpfunction,
  282.                 (std::function<T(T)>) fpfunction_ref,
  283.                 fpvals[i] );
  284.  
  285. }
  286.  
  287.  
  288. //------------------------------------------------------------------------------
  289. // Function tests
  290. /// Test a fp function with a double (double) signatures
  291. template<class T>
  292. void printFuncDiff(const std::string& name,
  293.         std::function<T(T,T)> fpfunction,
  294.         std::function<T(T,T)> fpfunction_ref,
  295.         T* fpvals1,
  296.         T* fpvals2,
  297.         const uint32_t size){
  298.  
  299.     for (uint32_t i=0;i<size;i++)
  300.         printFuncDiff ( name,
  301.                 (std::function<T(T,T)>) fpfunction,
  302.                 (std::function<T(T,T)>) fpfunction_ref,
  303.                 fpvals1[i],
  304.                 fpvals2[i]);
  305.  
  306. }
  307.  
  308. //------------------------------------------------------------------------------
  309. /// Get the clock cycles
  310. class timer{
  311. public:
  312.     timer(){}
  313.     ~timer(){}
  314.     void print(){
  315.         const uint64_t nsecs=get_elapsed_time();
  316.         std::cout << "Time elapsed: " << nsecs << " nanoseconds.\n";// ("
  317.         //<< m_get_elapsed_clocks(nsecs) << " clock)\n";
  318.     }
  319. #if defined (__APPLE__)
  320.     void inline start(){m_time1=mach_absolute_time();}
  321.     void inline stop(){m_time2=mach_absolute_time();}
  322.     uint64_t get_elapsed_time(){
  323.         static mach_timebase_info_data_t    sTimebaseInfo;
  324.         const uint64_t elapsed = m_time2 - m_time1;
  325.         // Convert to nanoseconds.
  326.         // Have to do some pointer fun because AbsoluteToNanoseconds
  327.         // works in terms of UnsignedWide, which is a structure rather
  328.         // than a proper 64-bit integer.
  329.  
  330.         if ( sTimebaseInfo.denom == 0 ) {
  331.             (void) mach_timebase_info(&sTimebaseInfo);
  332.         }
  333.  
  334.         // Do the maths. We hope that the multiplication doesn't
  335.         // overflow; the price you pay for working in fixed point.
  336.  
  337.         uint64_t elapsedNano = elapsed * sTimebaseInfo.numer / sTimebaseInfo.denom;
  338.  
  339.         return elapsedNano;
  340.     }
  341.  
  342. private:
  343.     uint64_t m_time1,m_time2;
  344.  
  345. #else
  346.     void inline start(){
  347.         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &m_time1);
  348.     }
  349.     void inline stop(){
  350.         clock_gettime(CLOCK_THREAD_CPUTIME_ID, &m_time2);
  351.     }
  352.  
  353.     /// Return time in nanoseconds
  354.  
  355.     uint64_t get_elapsed_time(){
  356.         timespec temp;
  357.         temp.tv_sec = m_time2.tv_sec-m_time1.tv_sec;
  358.         temp.tv_nsec = m_time2.tv_nsec-m_time1.tv_nsec;
  359.         uint64_t elapsed_time = temp.tv_nsec;
  360.         elapsed_time += 1e9*temp.tv_sec;
  361.         return elapsed_time;
  362.     }
  363.  
  364. private:
  365.     timespec m_time1,m_time2;
  366. #endif
  367.  
  368. };
  369.  
  370. //------------------------------------------------------------------------------
  371. // inline uint64_t getcpuclock() {
  372. //  return __rdtsc();
  373. // }
  374.  
  375.  
  376. }//end of namespace vdth
  377. #endif
  378.  
  379. /*
  380.  * vdtcore_common.h
  381.  * Common functions for the vdt routines.
  382.  * The basic idea is to exploit Pade polynomials.
  383.  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
  384.  * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos,
  385.  * tan, asin, acos and atan functions. The Cephes library can be found here:
  386.  * http://www.netlib.org/cephes/
  387.  *
  388.  *  Created on: Jun 23, 2012
  389.  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
  390.  */
  391.  
  392. /*
  393.  * VDT is free software: you can redistribute it and/or modify
  394.  * it under the terms of the GNU Lesser Public License as published by
  395.  * the Free Software Foundation, either version 3 of the License, or
  396.  * (at your option) any later version.
  397.  *
  398.  * This program is distributed in the hope that it will be useful,
  399.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  400.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  401.  * GNU Lesser Public License for more details.
  402.  *
  403.  * You should have received a copy of the GNU Lesser Public License
  404.  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  405.  */
  406.  
  407. #ifndef VDTCOMMON_H_
  408. #define VDTCOMMON_H_
  409.  
  410. #include "inttypes.h"
  411. #include <cmath>
  412.  
  413. namespace vdt{
  414.  
  415. namespace details{
  416.  
  417. // Constants
  418. const double TWOPI = 2.*M_PI;
  419. const double PI = M_PI;
  420. const double PIO2 = M_PI_2;
  421. const double PIO4 = M_PI_4;
  422. const double ONEOPIO4 = 4./M_PI;
  423.  
  424. const float TWOPIF = 2.*M_PI;
  425. const float PIF = M_PI;
  426. const float PIO2F = M_PI_2;
  427. const float PIO4F = M_PI_4;
  428. const float ONEOPIO4F = 4./M_PI;
  429.  
  430. const double MOREBITS = 6.123233995736765886130E-17;
  431.  
  432.  
  433. const float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
  434.  
  435. //------------------------------------------------------------------------------
  436.  
  437. /// Used to switch between different type of interpretations of the data (64 bits)
  438. union ieee754{
  439.     ieee754 () {};
  440.     ieee754 (double thed) {d=thed;};
  441.     ieee754 (uint64_t thell) {ll=thell;};
  442.     ieee754 (float thef) {f[0]=thef;};
  443.     ieee754 (uint32_t thei) {i[0]=thei;};
  444.   double d;
  445.   float f[2];
  446.   uint32_t i[2];
  447.   uint64_t ll;
  448.   uint16_t s[4];
  449. };
  450.  
  451. //------------------------------------------------------------------------------
  452.  
  453. /// Converts an unsigned long long to a double
  454. inline double uint642dp(uint64_t ll) {
  455.   ieee754 tmp;
  456.   tmp.ll=ll;
  457.   return tmp.d;
  458. }
  459.  
  460. //------------------------------------------------------------------------------
  461.  
  462. /// Converts a double to an unsigned long long
  463. inline uint64_t dp2uint64(double x) {
  464.   ieee754 tmp;
  465.   tmp.d=x;
  466.   return tmp.ll;
  467. }
  468.  
  469. //------------------------------------------------------------------------------
  470. /// Makes an AND of a double and a unsigned long long
  471. inline double dpANDuint64(const double x, const uint64_t i ){
  472.   return uint642dp(dp2uint64(x) & i);
  473. }
  474. //------------------------------------------------------------------------------
  475. /// Makes an OR of a double and a unsigned long long
  476. inline double dpORuint64(const double x, const uint64_t i ){
  477.   return uint642dp(dp2uint64(x) | i);
  478. }
  479.  
  480. /// Makes a XOR of a double and a unsigned long long
  481. inline double dpXORuint64(const double x, const uint64_t i ){
  482.   return uint642dp(dp2uint64(x) ^ i);
  483. }
  484.  
  485. //------------------------------------------------------------------------------
  486. inline uint64_t getSignMask(const double x){
  487.   const uint64_t mask=0x8000000000000000ULL;
  488.   return dp2uint64(x) & mask;
  489. }
  490.  
  491. //------------------------------------------------------------------------------
  492. /// Converts an int to a float
  493. inline float uint322sp(int x) {
  494.     ieee754 tmp;
  495.     tmp.i[0]=x;
  496.     return tmp.f[0];
  497.   }
  498.  
  499. //------------------------------------------------------------------------------
  500. /// Converts a float to an int
  501. inline uint32_t sp2uint32(float x) {
  502.     ieee754 tmp;
  503.     tmp.f[0]=x;
  504.     return tmp.i[0];
  505.   }
  506.  
  507. //------------------------------------------------------------------------------
  508. /// Makes an AND of a float and a unsigned long
  509. inline float spANDuint32(const float x, const uint32_t i ){
  510.   return uint322sp(sp2uint32(x) & i);
  511. }
  512. //------------------------------------------------------------------------------
  513. /// Makes an OR of a float and a unsigned long
  514. inline float spORuint32(const float x, const uint32_t i ){
  515.   return uint322sp(sp2uint32(x) | i);
  516. }
  517.  
  518. //------------------------------------------------------------------------------
  519. /// Makes an OR of a float and a unsigned long
  520. inline float spXORuint32(const float x, const uint32_t i ){
  521.   return uint322sp(sp2uint32(x) ^ i);
  522. }
  523. //------------------------------------------------------------------------------
  524. /// Get the sign mask
  525. inline uint32_t getSignMask(const float x){
  526.   const uint32_t mask=0x80000000;
  527.   return sp2uint32(x) & mask;
  528. }
  529.  
  530. //------------------------------------------------------------------------------
  531. /// Like frexp but vectorising and the exponent is a double.
  532. inline double getMantExponent(const double x, double & fe){
  533.  
  534.   uint64_t n = dp2uint64(x);
  535.  
  536.   // Shift to the right up to the beginning of the exponent.
  537.   // Then with a mask, cut off the sign bit
  538.   uint64_t le = (n >> 52);
  539.  
  540.   // chop the head of the number: an int contains more than 11 bits (32)
  541.   int32_t e = le; // This is important since sums on uint64_t do not vectorise
  542.   fe = e-1023 ;
  543.  
  544.   // This puts to 11 zeroes the exponent
  545.   n &=0x800FFFFFFFFFFFFFULL;
  546.   // build a mask which is 0.5, i.e. an exponent equal to 1022
  547.   // which means *2, see the above +1.
  548.   const uint64_t p05 = 0x3FE0000000000000ULL; //dp2uint64(0.5);
  549.   n |= p05;
  550.  
  551.   return uint642dp(n);
  552. }
  553.  
  554. //------------------------------------------------------------------------------
  555. /// Like frexp but vectorising and the exponent is a float.
  556. inline float getMantExponentf(const float x, float & fe){
  557.  
  558.     uint32_t n = sp2uint32(x);
  559.     int32_t e = (n >> 23)-127;
  560.     fe = e;
  561.  
  562.     // fractional part
  563.     const uint32_t p05f = 0x3f000000; // //sp2uint32(0.5);
  564.     n &= 0x807fffff;// ~0x7f800000;
  565.     n |= p05f;
  566.  
  567.     return uint322sp(n);
  568.  
  569. }
  570.  
  571. //------------------------------------------------------------------------------
  572. /// Converts a fp to an int
  573. inline uint32_t fp2uint(float x) {
  574.     return sp2uint32(x);
  575.   }
  576. /// Converts a fp to an int
  577. inline uint64_t fp2uint(double x) {
  578.     return dp2uint64(x);
  579.   }
  580. /// Converts an int to fp
  581. inline float int2fp(uint32_t i) {
  582.     return uint322sp(i);
  583.   }
  584. /// Converts an int to fp
  585. inline double int2fp(uint64_t i) {
  586.     return uint642dp(i);
  587.   }
  588.  
  589. //------------------------------------------------------------------------------
  590. /**
  591.  * A vectorisable floor implementation, not only triggered by fast-math.
  592.  * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509
  593.  * compliant for argument -0.0
  594. **/
  595. inline double fpfloor(const double x){
  596.   // no problem since exp is defined between -708 and 708. Int is enough for it!
  597.   int32_t ret = int32_t (x);
  598.   ret-=(sp2uint32(x)>>31);  
  599.   return ret;
  600.  
  601. }
  602. //------------------------------------------------------------------------------
  603. /**
  604.  * A vectorisable floor implementation, not only triggered by fast-math.
  605.  * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509
  606.  * compliant for argument -0.0
  607. **/
  608. inline float fpfloor(const float x){
  609.   int32_t ret = int32_t (x);
  610.   ret-=(sp2uint32(x)>>31);  
  611.   return ret;
  612.  
  613. }
  614.  
  615. //------------------------------------------------------------------------------
  616.  
  617.  
  618.  
  619.  
  620. }
  621.  
  622. } // end of namespace vdt
  623.  
  624. #endif /* VDTCOMMON_H_ */
  625.  
  626.  
  627. /*
  628.  * sincos_common.h
  629.  * The basic idea is to exploit Pade polynomials.
  630.  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
  631.  * moshier@na-net.ornl.gov) as well as actual code.
  632.  * The Cephes library can be found here:  http://www.netlib.org/cephes/
  633.  *
  634.  *  Created on: Jun 23, 2012
  635.  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
  636.  */
  637.  
  638. /*
  639.  * VDT is free software: you can redistribute it and/or modify
  640.  * it under the terms of the GNU Lesser Public License as published by
  641.  * the Free Software Foundation, either version 3 of the License, or
  642.  * (at your option) any later version.
  643.  *
  644.  * This program is distributed in the hope that it will be useful,
  645.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  646.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  647.  * GNU Lesser Public License for more details.
  648.  *
  649.  * You should have received a copy of the GNU Lesser Public License
  650.  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  651.  */
  652.  
  653. #include <cmath>
  654. #include <limits>
  655.  
  656. #ifndef SINCOS_COMMON_H_
  657. #define SINCOS_COMMON_H_
  658.  
  659. namespace vdt{
  660.  
  661. namespace details{
  662.  
  663. // double precision constants
  664.  
  665. const double DP1sc = 7.85398125648498535156E-1;
  666. const double DP2sc = 3.77489470793079817668E-8;
  667. const double DP3sc = 2.69515142907905952645E-15;
  668.  
  669. const double C1sin = 1.58962301576546568060E-10;
  670. const double C2sin =-2.50507477628578072866E-8;
  671. const double C3sin = 2.75573136213857245213E-6;
  672. const double C4sin =-1.98412698295895385996E-4;
  673. const double C5sin = 8.33333333332211858878E-3;
  674. const double C6sin =-1.66666666666666307295E-1;
  675.  
  676. const double C1cos =-1.13585365213876817300E-11;
  677. const double C2cos = 2.08757008419747316778E-9;
  678. const double C3cos =-2.75573141792967388112E-7;
  679. const double C4cos = 2.48015872888517045348E-5;
  680. const double C5cos =-1.38888888888730564116E-3;
  681. const double C6cos = 4.16666666666665929218E-2;
  682.  
  683. const double DP1 = 7.853981554508209228515625E-1;
  684. const double DP2 = 7.94662735614792836714E-9;
  685. const double DP3 = 3.06161699786838294307E-17;
  686.  
  687. // single precision constants
  688.  
  689. const float DP1F = 0.78515625;
  690. const float DP2F = 2.4187564849853515625e-4;
  691. const float DP3F = 3.77489497744594108e-8;
  692.  
  693. const float T24M1 = 16777215.;
  694.  
  695. //------------------------------------------------------------------------------
  696.  
  697. inline double get_sin_px(const double x){
  698.     double px=C1sin;
  699.     px *= x;
  700.     px += C2sin;
  701.     px *= x;
  702.     px += C3sin;
  703.     px *= x;
  704.     px += C4sin;
  705.     px *= x;
  706.     px += C5sin;
  707.     px *= x;
  708.     px += C6sin;
  709.     return px;
  710. }
  711.  
  712. //------------------------------------------------------------------------------
  713.  
  714. inline double get_cos_px(const double x){
  715.     double px=C1cos;
  716.     px *= x;
  717.     px += C2cos;
  718.     px *= x;
  719.     px += C3cos;
  720.     px *= x;
  721.     px += C4cos;
  722.     px *= x;
  723.     px += C5cos;
  724.     px *= x;
  725.     px += C6cos;
  726.     return px;
  727. }
  728.  
  729.  
  730. //------------------------------------------------------------------------------
  731. /// Reduce to 0 to 45
  732. inline double reduce2quadrant(double x, int32_t& quad) {
  733.  
  734.     x = fabs(x);
  735.     quad = int (ONEOPIO4 * x); // always positive, so (int) == std::floor
  736.     quad = (quad+1) & (~1);    
  737.     const double y = double (quad);
  738.     // Extended precision modular arithmetic
  739.     return ((x - y * DP1) - y * DP2) - y * DP3;
  740.   }
  741.  
  742. //------------------------------------------------------------------------------
  743. /// Sincos only for -45deg < x < 45deg
  744. inline void fast_sincos_m45_45( const double z, double & s, double &c ) {
  745.  
  746.     double zz = z * z;    
  747.     s = z  +  z * zz * get_sin_px(zz);                
  748.     c = 1.0 - zz * .5 + zz * zz * get_cos_px(zz);
  749.   }
  750.  
  751.  
  752. //------------------------------------------------------------------------------
  753.  
  754. } // End namespace details
  755.  
  756. /// Double precision sincos
  757. /*inline */ void __attribute__ ((noinline)) fast_sincos( const double xx, double & s, double &c ) {
  758.     // I have to use doubles to make it vectorise...
  759.  
  760.     int j;
  761.     double x = details::reduce2quadrant(xx,j);
  762.     const double signS = (j&4);
  763.  
  764.     j-=2;
  765.  
  766.     const double signC = (j&4);
  767.     const double poly = j&2;
  768.  
  769.     details::fast_sincos_m45_45(x,s,c);
  770.    
  771.     //swap
  772.     if( poly==0 ) {
  773.       const double tmp = c;
  774.       c=s;
  775.       s=tmp;
  776.     }
  777.  
  778.     if(signC == 0.)
  779.       c = -c;
  780.     if(signS != 0.)
  781.       s = -s;
  782.     if (xx < 0.)  
  783.       s = -s;
  784.  
  785.   }
  786.  
  787.  
  788. // Single precision functions
  789.  
  790. namespace details {
  791. //------------------------------------------------------------------------------
  792. /// Reduce to 0 to 45
  793. inline float reduce2quadrant(float x, int & quad) {
  794.     /* make argument positive */
  795.     x = fabs(x);
  796.  
  797.     quad = int (ONEOPIO4F * x); /* integer part of x/PIO4 */
  798.  
  799.     quad = (quad+1) & (~1);
  800.     const float y = float(quad);
  801.     // quad &=4;
  802.     // Extended precision modular arithmetic
  803.     return ((x - y * DP1F) - y * DP2F) - y * DP3F;
  804.   }
  805.  
  806.  
  807. //------------------------------------------------------------------------------
  808.  
  809.  
  810.  
  811. /// Sincos only for -45deg < x < 45deg
  812. inline void fast_sincosf_m45_45( const float x, float & s, float &c ) {
  813.  
  814.     float z = x * x;
  815.  
  816.     s = (((-1.9515295891E-4f * z
  817.        + 8.3321608736E-3f) * z
  818.       - 1.6666654611E-1f) * z * x)
  819.       + x;
  820.  
  821.     c = ((  2.443315711809948E-005f * z
  822.         - 1.388731625493765E-003f) * z
  823.      + 4.166664568298827E-002f) * z * z
  824.       - 0.5f * z + 1.0f;
  825.   }
  826.  
  827. //------------------------------------------------------------------------------
  828.  
  829. } // end details namespace
  830.  
  831. /// Single precision sincos
  832. inline void fast_sincosf( const float xx, float & s, float &c ) {
  833.    
  834.  
  835.     int j;
  836.     const float x = details::reduce2quadrant(xx,j);
  837.     int signS = (j&4);
  838.  
  839.     j-=2;
  840.  
  841.     const int signC = (j&4);
  842.     const int poly = j&2;
  843.  
  844.     float ls,lc;
  845.     details::fast_sincosf_m45_45(x,ls,lc);
  846.  
  847.     //swap
  848.     if( poly==0 ) {
  849.       const float tmp = lc;
  850.       lc=ls; ls=tmp;
  851.     }
  852.  
  853.     if(signC == 0) lc = -lc;
  854.     if(signS != 0) ls = -ls;
  855.     if (xx<0)  ls = -ls;
  856.     c=lc;
  857.     s=ls;
  858.   }
  859.  
  860.  
  861. } // end namespace vdt
  862.  
  863. #endif
  864.  
  865.  
  866. #include <iostream>
  867.  
  868. #include <cmath>
  869. #include <stdint.h>
  870.  
  871. const int samples = 2048; // Table size.
  872. const double pi = 3.1415926535897932;
  873. const double pi_half = pi / 2;
  874. const double inverse_pi_half = 1 / pi_half;
  875. const double two_pi = pi * 2;
  876. const double interval = pi_half / samples;
  877. const double inverse_interval = samples / pi_half;
  878.  
  879. static double table_sin[samples + 2];
  880. static double table_cos_interval[samples + 2];
  881. static double table_cos[samples + 2];
  882.  
  883. void PopulateTrigonometricTable(double* sin_table, double* cos_table_interval, double* cos_table_2, int samples) {
  884.   double* sin_buffer = sin_table;
  885.   double* cos_buffer = cos_table_interval;
  886.   const double pi_half = 3.1415926535897932 / 2;
  887.   double interval = pi_half / samples;
  888.   for (int i = 0; i < samples + 1; i++) {
  889.     double sample = std::sin(i * interval);
  890.     sin_buffer[i] = sample;
  891.     cos_table_2[samples - i] = sample;
  892.     cos_buffer[samples - i] = sample * interval;
  893.   }
  894.  
  895.   // Fill this to catch out of bound accesses when calculating Math.sin(pi/2).
  896.   sin_buffer[samples + 1] = std::sin(pi_half + interval);
  897.   cos_table_2[samples + 1] = std::cos(pi_half + interval);
  898.   cos_buffer[samples + 1] = std::cos(pi_half + interval) * interval;
  899. }
  900.  
  901. void InitTrigonometricFunctions() {
  902.   PopulateTrigonometricTable(table_sin, table_cos_interval, table_cos, samples);
  903. }
  904.  
  905. // This implements the following algorithm.
  906. // 1) Multiplication takes care of to-number conversion.
  907. // 2) Reduce x to the first quadrant [0, pi/2].
  908. // Conveniently enough, in case of +/-Infinity, we get NaN.
  909. // 3) Replace x by (pi/2-x) if x was in the 2nd or 4th quadrant.
  910. // 4) Do a table lookup for the closest samples to the left and right of x.
  911. // 5) Find the derivatives at those sampling points by table lookup:
  912. // dsin(x)/dx = cos(x) = sin(pi/2-x) for x in [0, pi/2].
  913. // 6) Use cubic spline interpolation to approximate sin(x).
  914. // 7) Negate the result if x was in the 3rd or 4th quadrant.
  915. // 8) Get rid of -0 by adding 0.
  916. double __attribute__ ((noinline)) MathSinImpl(double x) {
  917.   double multiple = std::floor(x * inverse_pi_half);
  918.   int multiple_and_one = ((int)multiple) & 1;
  919.   x = multiple_and_one * pi_half + (1 - (multiple_and_one << 1)) * (x - multiple * pi_half);
  920.   int multiple_and_two = ((int)multiple) & 2;
  921.   double double_index = x * inverse_interval;
  922.   int index = (int)double_index/* | 0*/;
  923.   double t1 = double_index - index;
  924.   double t2 = 1 - t1;
  925.   double y1 = table_sin[index];
  926.   double y2 = table_sin[index + 1];
  927.   double dy = y2 - y1;
  928.   return (t2 * y1 + t1 * y2 +
  929.     t1 * t2 * ((table_cos_interval[index] - dy) * t2 +
  930.            (dy - table_cos_interval[index + 1]) * t1)) *
  931.     (1 - multiple_and_two) + 0;
  932. };
  933.  
  934. void MathSinCosImpl(double x, double &sinx, double &cosx) {
  935.   sinx = MathSinImpl(x);
  936.   cosx = MathSinImpl(x + pi_half);
  937. }
  938.  
  939. void MathSinCosImpl2(double x, double &sinx, double &cosx) {
  940.   double multiple = std::floor(x * inverse_pi_half);
  941.   int multiple_and_one = ((int)multiple) & 1;
  942.   x = multiple_and_one * pi_half + (1 - (multiple_and_one << 1)) * (x - multiple * pi_half);
  943.   int multiple_and_two = ((int)multiple) & 2;
  944.   double double_index = x * inverse_interval;
  945.   int index = (int)double_index/* | 0*/;
  946.   double t1 = double_index - index;
  947.   double t2 = 1 - t1;
  948.   double y1 = table_sin[index];
  949.   double y2 = table_sin[index + 1];
  950.   double ycos1 = table_cos[index];
  951.   double ycos2 = table_cos[index + 1];
  952.   double dy = y2 - y1;
  953.   double dycos = ycos1 - ycos2;
  954.   sinx = (t2 * y1 + t1 * y2 + t1 * t2 * ((ycos1 * interval - dy) * t2 + (dy - ycos2 * interval) * t1)) *  (1 - multiple_and_two) + 0;
  955.   cosx = (t1 * ycos2 + t2 * ycos1 + t2 * t1 * ((y2 * interval - dycos) * t1 + (dycos - y1 * interval) * t2)) *  (1 - multiple_and_two) + 0;
  956. };
  957.  
  958. int main(int argc, char **argv) {
  959.   InitTrigonometricFunctions();
  960.   const int64_t n = (int64_t)1e+8;
  961.   double topval = 10000.0;
  962.   double topvalDiv2p32 = topval / std::numeric_limits<uint32_t>::max();
  963.   double step = topval / n;
  964.   double x = 0.0;
  965.   double maxerr = 0;
  966.   int maxbitserr = 0;
  967.   std::cout.precision(15);
  968.   int64_t sumerr = 0;
  969.   uint32_t r = 0;
  970.  
  971.   double a = 0;
  972.   for(int64_t i = 0; i < n; i++) {
  973.     r = 69069 * r + 1;
  974.     x = r * topvalDiv2p32;
  975.     // x += step;
  976.    
  977.     double sin_fast, cos_fast;
  978.    
  979.     sin_fast = MathSinImpl(x);
  980.     // MathSinCosImpl(x, sin_fast, cos_fast);
  981.     //vdt::fast_sincos(x, sin_fast, cos_fast);
  982.     //sin_fast = std::sin(x);
  983.    
  984.     a += sin_fast;
  985. #if 0
  986.     double stdsin = std::sin(x);
  987.    
  988.     //double err = std::abs(1.0 - sin_fast / stdsin);
  989.     int diffbits = vdth::diffbit<double>(sin_fast, stdsin);
  990.     sumerr += diffbits;
  991.     if(diffbits > maxbitserr) {
  992.       std::cout << diffbits  << " (x=" << x << ")" << std::endl;
  993.       maxbitserr = diffbits;
  994.       //maxerr =  err;
  995.     }
  996. #endif    
  997.   }
  998.   std::cout << a << std::endl;
  999.   std::cout << ((double)sumerr / n) << std::endl;
  1000.     //std::cout << "Hello, world!" << std::endl;
  1001.     return 0;
  1002. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement