Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Helper functions used for the diagnostic of the vdt routines.
- * They are not optimised for speed.
- * Authors: Danilo Piparo CERN
- **/
- /*
- * VDT is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- *
- * You should have received a copy of the GNU Lesser Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #ifndef VDTHELPER_H_
- #define VDTHELPER_H_
- #include <bitset>
- #include <iostream>
- #include <sstream>
- #include <string>
- #include <functional>
- #include "inttypes.h"
- // #include "x86intrin.h"
- #include <cmath> //for log2
- #include "time.h"
- #include "sys/time.h"
- #ifdef __APPLE__
- #include <CoreServices/CoreServices.h>
- #include <mach/mach.h>
- #include <mach/mach_time.h>
- #include <unistd.h>
- #endif
- namespace{
- // Establish the size of the double and single precision and the bitsets
- constexpr double _tmp=0;
- constexpr uint32_t dp_size_in_bits = sizeof(_tmp)*8;
- using dp_bitset = std::bitset<dp_size_in_bits>;
- }
- namespace vdth{
- //------------------------------------------------------------------------------
- // Useful alias for some functions
- using dpdpfunction = std::function<double(double)>;
- using dpdpfunctionv = std::function<void (uint32_t, double*, double*)>;
- using spspfunction = std::function<float(float)>;
- using spspfunctionv = std::function<void(uint32_t, float*, float*)>;
- using dpdp2function = std::function<double(double,double)>;
- using dpdp2functionv = std::function<void (uint32_t, double*, double*, double*)>;
- using spsp2function = std::function<float(float,float)>;
- using spsp2functionv = std::function<void(uint32_t, float*, float*, float*)>;
- //maybe for convenience
- template<class T> using genfpfunction = std::function<T(T)>;
- template<class T> using genfpfunctionv = std::function<void(uint32_t, T*, T*)>;
- template<class T> using genfp2function = std::function<T(T,T)>;
- template<class T> using genfp2functionv = std::function<void(uint32_t, T*, T*, T*)>;
- //------------------------------------------------------------------------------
- /// Useful union
- union standard{
- double dp;
- float sp[2];
- uint64_t li;
- };
- //------------------------------------------------------------------------------
- template<class T>
- uint32_t inline getSizeInbits(const T x){
- return sizeof(x) * 8;
- }
- //------------------------------------------------------------------------------
- /// Convert a fp into a uint64_t not optimised for speed
- template<class T>
- inline uint64_t fp2uint64(const T x){
- const uint32_t size = getSizeInbits(x);
- standard conv;
- conv.dp=0.;
- if (size==dp_size_in_bits)
- conv.dp=x;
- else
- conv.sp[0]=x;
- return conv.li;
- }
- //------------------------------------------------------------------------------
- /// Convert a double into a bitset
- template<class T>
- inline const dp_bitset fp2bs( const T x ){
- dp_bitset const bits (fp2uint64(x));
- return bits;
- }
- //------------------------------------------------------------------------------
- /// Print as a dp formatted bitset
- template<class T>
- const std::string getbsasstr(const T x){
- const uint32_t size = getSizeInbits(x);
- uint32_t offset = 0;
- uint32_t exp_size = 11;
- uint32_t mant_size = 52;
- if (size!=dp_size_in_bits){
- offset = 32;
- exp_size = 8;
- mant_size = 23;
- }
- // Convert the bitstream to string
- std::string bitset_as_string (fp2bs(x).to_string());
- std::ostringstream os;
- // sign
- os << bitset_as_string[offset] << " ";
- // exponent
- for (unsigned int i=offset+1;i<offset+1+exp_size;i++)
- os << bitset_as_string[i];
- os << " ";
- //mantissa
- for (unsigned int i=offset+1+exp_size;i<offset+1+exp_size+mant_size;i++)
- os << bitset_as_string[i];
- return os.str();
- }
- //------------------------------------------------------------------------------
- /// Returns most significative different bit dp
- template <class T>
- uint16_t diffbit(const T a,const T b ){
- /// make a xor
- uint64_t ia = fp2uint64(a);
- uint64_t ib = fp2uint64(b);
- uint64_t c = ia>ib? ia-ib : ib -ia;
- //uint64_t c = ia^ib;
- /// return the log2+1
- return log2(c)+1;
- }
- //------------------------------------------------------------------------------
- ///Check and print which instructions sets are enabled.
- void print_instructions_info(){
- std::ostringstream os;
- os << "List of enabled instructions' sets:\n";
- os << " o SSE2 instructions set "
- #ifndef __SSE2__
- << "not "
- #endif
- << "enabled.\n"
- << " o SSE3 instructions set "
- #ifndef __SSE3__
- << "not "
- #endif
- << "enabled.\n"
- << " o SSE4.1 instructions set "
- #ifndef __SSE4_1__
- << "not "
- #endif
- << "enabled.\n"
- << " o AVX instructions set "
- #ifndef __AVX__
- << "not "
- #endif
- << "enabled.\n";
- std::cout << os.str();
- }
- //------------------------------------------------------------------------------
- /// Print the different bit of two fp numbers
- template<class T>
- void print_different_bit(const T a, const T b, const bool show_identical=true){
- std::cout.precision(10);
- std::cout << "Different bit between " << a << " and " << b
- << " is " << diffbit(a,b) << std::endl;
- if (show_identical)
- std::cout << getbsasstr(a) << std::endl
- << getbsasstr(b) << std::endl<< std::endl;
- }
- //------------------------------------------------------------------------------
- /// Invoke two functions and print on screen their argument and different bits
- template<class T>
- void printFuncDiff(const std::string& func_name, std::function<T(T)> f1,std::function<T(T)> f2, const T x){
- std::cout << "Function " << func_name << "(" << x << ")" << std::endl;
- print_different_bit(f1(x),f2(x),true);
- }
- /// Invoke two functions and print on screen their argument and different bits
- template<class T>
- void printFuncDiff(const std::string& func_name, std::function<T(T,T)> f1,std::function<T(T,T)> f2, const T x, const T y){
- std::cout << "Function " << func_name << "(" << x << ", "<< y <<")" << std::endl;
- print_different_bit(f1(x,y),f2(x,y),true);
- }
- //------------------------------------------------------------------------------
- /// Invoke two functions and print on screen their argument and different bits
- template<class T>
- void printFuncDiff(const std::string& func_name,
- genfpfunctionv<T> f1,
- genfpfunctionv<T> f2,
- T* x_arr,
- const uint32_t size){
- std::cout << "Function " << func_name << std::endl;
- T* res_1 = new T[size];
- f1(size,x_arr,res_1);
- T* res_2 = new T[size];
- f2(size,x_arr,res_2);
- for (uint32_t i=0;i<size;i++){
- std::cout << "Calculated in " << x_arr[i] << std::endl;
- print_different_bit(res_1[i],res_2[i],true);
- };
- delete [] res_1;
- delete [] res_2;
- }
- /// Invoke two functions and print on screen their argument and different bits
- template<class T>
- void printFuncDiff(const std::string& func_name,
- genfp2functionv<T> f1,
- genfp2functionv<T> f2,
- T* x_arr,
- T* y_arr,
- const uint32_t size){
- std::cout << "Function " << func_name << std::endl;
- T* res_1 = new T[size];
- f1(size,x_arr,y_arr,res_1);
- T* res_2 = new T[size];
- f2(size,x_arr,y_arr,res_2);
- for (uint32_t i=0;i<size;i++){
- std::cout << "Calculated in (" << x_arr[i] << ", " << y_arr[i] << ")" << std::endl;
- print_different_bit(res_1[i],res_2[i],true);
- };
- delete [] res_1;
- delete [] res_2;
- }
- //------------------------------------------------------------------------------
- // Function tests
- /// Test a fp function with a double (double) signatures
- template<class T>
- void printFuncDiff(const std::string& name,
- std::function<T(T)> fpfunction,
- std::function<T(T)> fpfunction_ref,
- T* fpvals,
- const uint32_t size){
- for (uint32_t i=0;i<size;i++)
- printFuncDiff ( name,
- (std::function<T(T)>) fpfunction,
- (std::function<T(T)>) fpfunction_ref,
- fpvals[i] );
- }
- //------------------------------------------------------------------------------
- // Function tests
- /// Test a fp function with a double (double) signatures
- template<class T>
- void printFuncDiff(const std::string& name,
- std::function<T(T,T)> fpfunction,
- std::function<T(T,T)> fpfunction_ref,
- T* fpvals1,
- T* fpvals2,
- const uint32_t size){
- for (uint32_t i=0;i<size;i++)
- printFuncDiff ( name,
- (std::function<T(T,T)>) fpfunction,
- (std::function<T(T,T)>) fpfunction_ref,
- fpvals1[i],
- fpvals2[i]);
- }
- //------------------------------------------------------------------------------
- /// Get the clock cycles
- class timer{
- public:
- timer(){}
- ~timer(){}
- void print(){
- const uint64_t nsecs=get_elapsed_time();
- std::cout << "Time elapsed: " << nsecs << " nanoseconds.\n";// ("
- //<< m_get_elapsed_clocks(nsecs) << " clock)\n";
- }
- #if defined (__APPLE__)
- void inline start(){m_time1=mach_absolute_time();}
- void inline stop(){m_time2=mach_absolute_time();}
- uint64_t get_elapsed_time(){
- static mach_timebase_info_data_t sTimebaseInfo;
- const uint64_t elapsed = m_time2 - m_time1;
- // Convert to nanoseconds.
- // Have to do some pointer fun because AbsoluteToNanoseconds
- // works in terms of UnsignedWide, which is a structure rather
- // than a proper 64-bit integer.
- if ( sTimebaseInfo.denom == 0 ) {
- (void) mach_timebase_info(&sTimebaseInfo);
- }
- // Do the maths. We hope that the multiplication doesn't
- // overflow; the price you pay for working in fixed point.
- uint64_t elapsedNano = elapsed * sTimebaseInfo.numer / sTimebaseInfo.denom;
- return elapsedNano;
- }
- private:
- uint64_t m_time1,m_time2;
- #else
- void inline start(){
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &m_time1);
- }
- void inline stop(){
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &m_time2);
- }
- /// Return time in nanoseconds
- uint64_t get_elapsed_time(){
- timespec temp;
- temp.tv_sec = m_time2.tv_sec-m_time1.tv_sec;
- temp.tv_nsec = m_time2.tv_nsec-m_time1.tv_nsec;
- uint64_t elapsed_time = temp.tv_nsec;
- elapsed_time += 1e9*temp.tv_sec;
- return elapsed_time;
- }
- private:
- timespec m_time1,m_time2;
- #endif
- };
- //------------------------------------------------------------------------------
- // inline uint64_t getcpuclock() {
- // return __rdtsc();
- // }
- }//end of namespace vdth
- #endif
- /*
- * vdtcore_common.h
- * Common functions for the vdt routines.
- * The basic idea is to exploit Pade polynomials.
- * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
- * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos,
- * tan, asin, acos and atan functions. The Cephes library can be found here:
- * http://www.netlib.org/cephes/
- *
- * Created on: Jun 23, 2012
- * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
- */
- /*
- * VDT is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- *
- * You should have received a copy of the GNU Lesser Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #ifndef VDTCOMMON_H_
- #define VDTCOMMON_H_
- #include "inttypes.h"
- #include <cmath>
- namespace vdt{
- namespace details{
- // Constants
- const double TWOPI = 2.*M_PI;
- const double PI = M_PI;
- const double PIO2 = M_PI_2;
- const double PIO4 = M_PI_4;
- const double ONEOPIO4 = 4./M_PI;
- const float TWOPIF = 2.*M_PI;
- const float PIF = M_PI;
- const float PIO2F = M_PI_2;
- const float PIO4F = M_PI_4;
- const float ONEOPIO4F = 4./M_PI;
- const double MOREBITS = 6.123233995736765886130E-17;
- const float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
- //------------------------------------------------------------------------------
- /// Used to switch between different type of interpretations of the data (64 bits)
- union ieee754{
- ieee754 () {};
- ieee754 (double thed) {d=thed;};
- ieee754 (uint64_t thell) {ll=thell;};
- ieee754 (float thef) {f[0]=thef;};
- ieee754 (uint32_t thei) {i[0]=thei;};
- double d;
- float f[2];
- uint32_t i[2];
- uint64_t ll;
- uint16_t s[4];
- };
- //------------------------------------------------------------------------------
- /// Converts an unsigned long long to a double
- inline double uint642dp(uint64_t ll) {
- ieee754 tmp;
- tmp.ll=ll;
- return tmp.d;
- }
- //------------------------------------------------------------------------------
- /// Converts a double to an unsigned long long
- inline uint64_t dp2uint64(double x) {
- ieee754 tmp;
- tmp.d=x;
- return tmp.ll;
- }
- //------------------------------------------------------------------------------
- /// Makes an AND of a double and a unsigned long long
- inline double dpANDuint64(const double x, const uint64_t i ){
- return uint642dp(dp2uint64(x) & i);
- }
- //------------------------------------------------------------------------------
- /// Makes an OR of a double and a unsigned long long
- inline double dpORuint64(const double x, const uint64_t i ){
- return uint642dp(dp2uint64(x) | i);
- }
- /// Makes a XOR of a double and a unsigned long long
- inline double dpXORuint64(const double x, const uint64_t i ){
- return uint642dp(dp2uint64(x) ^ i);
- }
- //------------------------------------------------------------------------------
- inline uint64_t getSignMask(const double x){
- const uint64_t mask=0x8000000000000000ULL;
- return dp2uint64(x) & mask;
- }
- //------------------------------------------------------------------------------
- /// Converts an int to a float
- inline float uint322sp(int x) {
- ieee754 tmp;
- tmp.i[0]=x;
- return tmp.f[0];
- }
- //------------------------------------------------------------------------------
- /// Converts a float to an int
- inline uint32_t sp2uint32(float x) {
- ieee754 tmp;
- tmp.f[0]=x;
- return tmp.i[0];
- }
- //------------------------------------------------------------------------------
- /// Makes an AND of a float and a unsigned long
- inline float spANDuint32(const float x, const uint32_t i ){
- return uint322sp(sp2uint32(x) & i);
- }
- //------------------------------------------------------------------------------
- /// Makes an OR of a float and a unsigned long
- inline float spORuint32(const float x, const uint32_t i ){
- return uint322sp(sp2uint32(x) | i);
- }
- //------------------------------------------------------------------------------
- /// Makes an OR of a float and a unsigned long
- inline float spXORuint32(const float x, const uint32_t i ){
- return uint322sp(sp2uint32(x) ^ i);
- }
- //------------------------------------------------------------------------------
- /// Get the sign mask
- inline uint32_t getSignMask(const float x){
- const uint32_t mask=0x80000000;
- return sp2uint32(x) & mask;
- }
- //------------------------------------------------------------------------------
- /// Like frexp but vectorising and the exponent is a double.
- inline double getMantExponent(const double x, double & fe){
- uint64_t n = dp2uint64(x);
- // Shift to the right up to the beginning of the exponent.
- // Then with a mask, cut off the sign bit
- uint64_t le = (n >> 52);
- // chop the head of the number: an int contains more than 11 bits (32)
- int32_t e = le; // This is important since sums on uint64_t do not vectorise
- fe = e-1023 ;
- // This puts to 11 zeroes the exponent
- n &=0x800FFFFFFFFFFFFFULL;
- // build a mask which is 0.5, i.e. an exponent equal to 1022
- // which means *2, see the above +1.
- const uint64_t p05 = 0x3FE0000000000000ULL; //dp2uint64(0.5);
- n |= p05;
- return uint642dp(n);
- }
- //------------------------------------------------------------------------------
- /// Like frexp but vectorising and the exponent is a float.
- inline float getMantExponentf(const float x, float & fe){
- uint32_t n = sp2uint32(x);
- int32_t e = (n >> 23)-127;
- fe = e;
- // fractional part
- const uint32_t p05f = 0x3f000000; // //sp2uint32(0.5);
- n &= 0x807fffff;// ~0x7f800000;
- n |= p05f;
- return uint322sp(n);
- }
- //------------------------------------------------------------------------------
- /// Converts a fp to an int
- inline uint32_t fp2uint(float x) {
- return sp2uint32(x);
- }
- /// Converts a fp to an int
- inline uint64_t fp2uint(double x) {
- return dp2uint64(x);
- }
- /// Converts an int to fp
- inline float int2fp(uint32_t i) {
- return uint322sp(i);
- }
- /// Converts an int to fp
- inline double int2fp(uint64_t i) {
- return uint642dp(i);
- }
- //------------------------------------------------------------------------------
- /**
- * A vectorisable floor implementation, not only triggered by fast-math.
- * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509
- * compliant for argument -0.0
- **/
- inline double fpfloor(const double x){
- // no problem since exp is defined between -708 and 708. Int is enough for it!
- int32_t ret = int32_t (x);
- ret-=(sp2uint32(x)>>31);
- return ret;
- }
- //------------------------------------------------------------------------------
- /**
- * A vectorisable floor implementation, not only triggered by fast-math.
- * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509
- * compliant for argument -0.0
- **/
- inline float fpfloor(const float x){
- int32_t ret = int32_t (x);
- ret-=(sp2uint32(x)>>31);
- return ret;
- }
- //------------------------------------------------------------------------------
- }
- } // end of namespace vdt
- #endif /* VDTCOMMON_H_ */
- /*
- * sincos_common.h
- * The basic idea is to exploit Pade polynomials.
- * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
- * moshier@na-net.ornl.gov) as well as actual code.
- * The Cephes library can be found here: http://www.netlib.org/cephes/
- *
- * Created on: Jun 23, 2012
- * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
- */
- /*
- * VDT is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- *
- * You should have received a copy of the GNU Lesser Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #include <cmath>
- #include <limits>
- #ifndef SINCOS_COMMON_H_
- #define SINCOS_COMMON_H_
- namespace vdt{
- namespace details{
- // double precision constants
- const double DP1sc = 7.85398125648498535156E-1;
- const double DP2sc = 3.77489470793079817668E-8;
- const double DP3sc = 2.69515142907905952645E-15;
- const double C1sin = 1.58962301576546568060E-10;
- const double C2sin =-2.50507477628578072866E-8;
- const double C3sin = 2.75573136213857245213E-6;
- const double C4sin =-1.98412698295895385996E-4;
- const double C5sin = 8.33333333332211858878E-3;
- const double C6sin =-1.66666666666666307295E-1;
- const double C1cos =-1.13585365213876817300E-11;
- const double C2cos = 2.08757008419747316778E-9;
- const double C3cos =-2.75573141792967388112E-7;
- const double C4cos = 2.48015872888517045348E-5;
- const double C5cos =-1.38888888888730564116E-3;
- const double C6cos = 4.16666666666665929218E-2;
- const double DP1 = 7.853981554508209228515625E-1;
- const double DP2 = 7.94662735614792836714E-9;
- const double DP3 = 3.06161699786838294307E-17;
- // single precision constants
- const float DP1F = 0.78515625;
- const float DP2F = 2.4187564849853515625e-4;
- const float DP3F = 3.77489497744594108e-8;
- const float T24M1 = 16777215.;
- //------------------------------------------------------------------------------
- inline double get_sin_px(const double x){
- double px=C1sin;
- px *= x;
- px += C2sin;
- px *= x;
- px += C3sin;
- px *= x;
- px += C4sin;
- px *= x;
- px += C5sin;
- px *= x;
- px += C6sin;
- return px;
- }
- //------------------------------------------------------------------------------
- inline double get_cos_px(const double x){
- double px=C1cos;
- px *= x;
- px += C2cos;
- px *= x;
- px += C3cos;
- px *= x;
- px += C4cos;
- px *= x;
- px += C5cos;
- px *= x;
- px += C6cos;
- return px;
- }
- //------------------------------------------------------------------------------
- /// Reduce to 0 to 45
- inline double reduce2quadrant(double x, int32_t& quad) {
- x = fabs(x);
- quad = int (ONEOPIO4 * x); // always positive, so (int) == std::floor
- quad = (quad+1) & (~1);
- const double y = double (quad);
- // Extended precision modular arithmetic
- return ((x - y * DP1) - y * DP2) - y * DP3;
- }
- //------------------------------------------------------------------------------
- /// Sincos only for -45deg < x < 45deg
- inline void fast_sincos_m45_45( const double z, double & s, double &c ) {
- double zz = z * z;
- s = z + z * zz * get_sin_px(zz);
- c = 1.0 - zz * .5 + zz * zz * get_cos_px(zz);
- }
- //------------------------------------------------------------------------------
- } // End namespace details
- /// Double precision sincos
- /*inline */ void __attribute__ ((noinline)) fast_sincos( const double xx, double & s, double &c ) {
- // I have to use doubles to make it vectorise...
- int j;
- double x = details::reduce2quadrant(xx,j);
- const double signS = (j&4);
- j-=2;
- const double signC = (j&4);
- const double poly = j&2;
- details::fast_sincos_m45_45(x,s,c);
- //swap
- if( poly==0 ) {
- const double tmp = c;
- c=s;
- s=tmp;
- }
- if(signC == 0.)
- c = -c;
- if(signS != 0.)
- s = -s;
- if (xx < 0.)
- s = -s;
- }
- // Single precision functions
- namespace details {
- //------------------------------------------------------------------------------
- /// Reduce to 0 to 45
- inline float reduce2quadrant(float x, int & quad) {
- /* make argument positive */
- x = fabs(x);
- quad = int (ONEOPIO4F * x); /* integer part of x/PIO4 */
- quad = (quad+1) & (~1);
- const float y = float(quad);
- // quad &=4;
- // Extended precision modular arithmetic
- return ((x - y * DP1F) - y * DP2F) - y * DP3F;
- }
- //------------------------------------------------------------------------------
- /// Sincos only for -45deg < x < 45deg
- inline void fast_sincosf_m45_45( const float x, float & s, float &c ) {
- float z = x * x;
- s = (((-1.9515295891E-4f * z
- + 8.3321608736E-3f) * z
- - 1.6666654611E-1f) * z * x)
- + x;
- c = (( 2.443315711809948E-005f * z
- - 1.388731625493765E-003f) * z
- + 4.166664568298827E-002f) * z * z
- - 0.5f * z + 1.0f;
- }
- //------------------------------------------------------------------------------
- } // end details namespace
- /// Single precision sincos
- inline void fast_sincosf( const float xx, float & s, float &c ) {
- int j;
- const float x = details::reduce2quadrant(xx,j);
- int signS = (j&4);
- j-=2;
- const int signC = (j&4);
- const int poly = j&2;
- float ls,lc;
- details::fast_sincosf_m45_45(x,ls,lc);
- //swap
- if( poly==0 ) {
- const float tmp = lc;
- lc=ls; ls=tmp;
- }
- if(signC == 0) lc = -lc;
- if(signS != 0) ls = -ls;
- if (xx<0) ls = -ls;
- c=lc;
- s=ls;
- }
- } // end namespace vdt
- #endif
- #include <iostream>
- #include <cmath>
- #include <stdint.h>
- const int samples = 2048; // Table size.
- const double pi = 3.1415926535897932;
- const double pi_half = pi / 2;
- const double inverse_pi_half = 1 / pi_half;
- const double two_pi = pi * 2;
- const double interval = pi_half / samples;
- const double inverse_interval = samples / pi_half;
- static double table_sin[samples + 2];
- static double table_cos_interval[samples + 2];
- static double table_cos[samples + 2];
- void PopulateTrigonometricTable(double* sin_table, double* cos_table_interval, double* cos_table_2, int samples) {
- double* sin_buffer = sin_table;
- double* cos_buffer = cos_table_interval;
- const double pi_half = 3.1415926535897932 / 2;
- double interval = pi_half / samples;
- for (int i = 0; i < samples + 1; i++) {
- double sample = std::sin(i * interval);
- sin_buffer[i] = sample;
- cos_table_2[samples - i] = sample;
- cos_buffer[samples - i] = sample * interval;
- }
- // Fill this to catch out of bound accesses when calculating Math.sin(pi/2).
- sin_buffer[samples + 1] = std::sin(pi_half + interval);
- cos_table_2[samples + 1] = std::cos(pi_half + interval);
- cos_buffer[samples + 1] = std::cos(pi_half + interval) * interval;
- }
- void InitTrigonometricFunctions() {
- PopulateTrigonometricTable(table_sin, table_cos_interval, table_cos, samples);
- }
- // This implements the following algorithm.
- // 1) Multiplication takes care of to-number conversion.
- // 2) Reduce x to the first quadrant [0, pi/2].
- // Conveniently enough, in case of +/-Infinity, we get NaN.
- // 3) Replace x by (pi/2-x) if x was in the 2nd or 4th quadrant.
- // 4) Do a table lookup for the closest samples to the left and right of x.
- // 5) Find the derivatives at those sampling points by table lookup:
- // dsin(x)/dx = cos(x) = sin(pi/2-x) for x in [0, pi/2].
- // 6) Use cubic spline interpolation to approximate sin(x).
- // 7) Negate the result if x was in the 3rd or 4th quadrant.
- // 8) Get rid of -0 by adding 0.
- double __attribute__ ((noinline)) MathSinImpl(double x) {
- double multiple = std::floor(x * inverse_pi_half);
- int multiple_and_one = ((int)multiple) & 1;
- x = multiple_and_one * pi_half + (1 - (multiple_and_one << 1)) * (x - multiple * pi_half);
- int multiple_and_two = ((int)multiple) & 2;
- double double_index = x * inverse_interval;
- int index = (int)double_index/* | 0*/;
- double t1 = double_index - index;
- double t2 = 1 - t1;
- double y1 = table_sin[index];
- double y2 = table_sin[index + 1];
- double dy = y2 - y1;
- return (t2 * y1 + t1 * y2 +
- t1 * t2 * ((table_cos_interval[index] - dy) * t2 +
- (dy - table_cos_interval[index + 1]) * t1)) *
- (1 - multiple_and_two) + 0;
- };
- void MathSinCosImpl(double x, double &sinx, double &cosx) {
- sinx = MathSinImpl(x);
- cosx = MathSinImpl(x + pi_half);
- }
- void MathSinCosImpl2(double x, double &sinx, double &cosx) {
- double multiple = std::floor(x * inverse_pi_half);
- int multiple_and_one = ((int)multiple) & 1;
- x = multiple_and_one * pi_half + (1 - (multiple_and_one << 1)) * (x - multiple * pi_half);
- int multiple_and_two = ((int)multiple) & 2;
- double double_index = x * inverse_interval;
- int index = (int)double_index/* | 0*/;
- double t1 = double_index - index;
- double t2 = 1 - t1;
- double y1 = table_sin[index];
- double y2 = table_sin[index + 1];
- double ycos1 = table_cos[index];
- double ycos2 = table_cos[index + 1];
- double dy = y2 - y1;
- double dycos = ycos1 - ycos2;
- sinx = (t2 * y1 + t1 * y2 + t1 * t2 * ((ycos1 * interval - dy) * t2 + (dy - ycos2 * interval) * t1)) * (1 - multiple_and_two) + 0;
- cosx = (t1 * ycos2 + t2 * ycos1 + t2 * t1 * ((y2 * interval - dycos) * t1 + (dycos - y1 * interval) * t2)) * (1 - multiple_and_two) + 0;
- };
- int main(int argc, char **argv) {
- InitTrigonometricFunctions();
- const int64_t n = (int64_t)1e+8;
- double topval = 10000.0;
- double topvalDiv2p32 = topval / std::numeric_limits<uint32_t>::max();
- double step = topval / n;
- double x = 0.0;
- double maxerr = 0;
- int maxbitserr = 0;
- std::cout.precision(15);
- int64_t sumerr = 0;
- uint32_t r = 0;
- double a = 0;
- for(int64_t i = 0; i < n; i++) {
- r = 69069 * r + 1;
- x = r * topvalDiv2p32;
- // x += step;
- double sin_fast, cos_fast;
- sin_fast = MathSinImpl(x);
- // MathSinCosImpl(x, sin_fast, cos_fast);
- //vdt::fast_sincos(x, sin_fast, cos_fast);
- //sin_fast = std::sin(x);
- a += sin_fast;
- #if 0
- double stdsin = std::sin(x);
- //double err = std::abs(1.0 - sin_fast / stdsin);
- int diffbits = vdth::diffbit<double>(sin_fast, stdsin);
- sumerr += diffbits;
- if(diffbits > maxbitserr) {
- std::cout << diffbits << " (x=" << x << ")" << std::endl;
- maxbitserr = diffbits;
- //maxerr = err;
- }
- #endif
- }
- std::cout << a << std::endl;
- std::cout << ((double)sumerr / n) << std::endl;
- //std::cout << "Hello, world!" << std::endl;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement