Sine approximations

//g++ -g -Os -O3 ./testing.cpp -o ./testing && ./testing
#include <stdlib.h>
#include <cstdint>
#include <type_traits>
#include <chrono>
#include <iostream>
#include <math.h>

//select uint type that is double of T width
template <typename T>
struct twiceWide {
    using type = typename std::conditional<(sizeof(T) == 1), uint16_t,
                   typename std::conditional<(sizeof(T) == 2), uint32_t,
                   typename std::conditional<(sizeof(T) == 4), uint64_t,
                   typename std::conditional<(sizeof(T) == 8), __uint128_t,
                                                void>::type>::type>::type>::type;
};

//https://www.desmos.com/calculator/cydsdmvy2t
//Fast sine approximation. range 0 to UINT_MAX.
template <typename T>
T ukaelSine(T num) {
    using T2 = typename twiceWide<T>::type; //required for squaring
    bool secondHalf = (num >> ((sizeof(T)<<3) - 1));    //sizeof(T)*8-1. values that're more than 0.5*UINT_MAX
    num <<= 1;  //get 2 periods of saw wave
    T2 buf = (static_cast<T2>(num)<<1) - ((T)~0);   //store to twiceWide to prevent overflow
    num = static_cast<T>((buf * buf) >> ((sizeof(T)<<3) + 1));  //square and rever back to T scale
    num = secondHalf ? num : ~num;  //invert 2nd half
    return num;
}

#define SAMPLES 16777216 //ram is your limit
#define REPEATS 16        //or you can split the testing


#define TEST_VALUE ( (i*i)   )  //squared
//#define TEST_VALUE ( rand() ) //random
//#define TEST_VALUE ( i         )  //linear
//#define TEST_VALUE ( 1         )

#define STORE_DOUBLE 0  // [1] can have up to 50% impact in favor of sin()

//sin() over ukaelSine() time //-O3 -Os
//higher is better in favor of ukaelSine
//lower  is better in favor of sin()
//STORE_DOUBLE==0
    //sin(i*i)      = 28 times slower       //best case for ukaelSine. Especially when storing uint8_t *value[], sin() is 42 times slower
    //sin(rand())   = 11 times slower
    //sin(i)        = 6 times slower
    //sin(1)        = ~1.0 and ~3 times faster without -O3 -Os flags. Best case scenario for sin()

//STORE_DOUBLE==1
    //sin(i*i)      = 20 times slower
    //sin(rand())   = 10 times slower
    //sin(i)        =  4 times slower
    //sin(1)        = ~1.0                  //worst case for ukaelSine

int main(){

    //warmup, gets your cpu's legs running
    uint64_t warmup = 12385377835987337323UL;
    for(int i=0;i<SAMPLES*REPEATS/4;i++){
        warmup=(uint64_t)(1+3573489789832437712ULL*( ( atan2((double)warmup,warmup*M_PI) ) )); //the least efficient rng
    }
    std::cout << warmup << "\n"; //print that it's not optimized out by -O3 and -Os

    double testTime[2];
//  for(int k=1;k>-1;k--){ //reverse test, minimal impact
    for(int k=0;k<2;k++){ //0=ukael sine   1=math.h sine

        std::chrono::steady_clock::time_point timest = std::chrono::steady_clock::now(); //first timer

            for(int j=0;j<REPEATS;j++){

#if STORE_DOUBLE==0
                typedef uint32_t test_t;
#else
                typedef double test_t;
#endif

                test_t *value;
                value = (test_t*) malloc(SAMPLES*sizeof(test_t));

                for(uint32_t i=1;i<SAMPLES;i+=1){ //iterate through SAMPLES
                    value[i] = k ?
                        sin      ( (double)( (double)TEST_VALUE*1.23456789) ) //k==1
                    :
                        ukaelSine( (uint32_t)( (double)TEST_VALUE*1.23456789) ) //k==0
                    ;
                }
                if(j==REPEATS-1){std::cout << "last value " << value[SAMPLES-1] << "\n";}//print last value before free
                free(value);    //Making sure that no chaching happens
            }

        k==0 ? std::cout << "ukael sine " : std::cout << "math.h sine ";
        testTime[k]=(double)std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now()-timest).count()/1000000.0;
        std::cout << testTime[k] << "\n";
    }
    std::cout << "time ratio: sin() / ukaelSine() " << testTime[1]/testTime[0] << "\n";
    return 0;
}


/*
//alternative Sine but poor precision as this doesn't require twiceWide
template <typename T>
T ukaelCSine(T num) {
    bool secondHalf = (num >> ((sizeof(T)<<3) - 1));
    bool evenQuarter = (num >> ((sizeof(T)<<3) - 2))==0 || (num >> ((sizeof(T)<<3) - 2))==2;
    num = evenQuarter ? ~num : num; //invert even quarters
    num <<= 2;  //2 period saw
    num >>= sizeof(T)<<2;   //square root. Crunchy precision
    num*=num;   //square
    num >>= 1;  //divide by 2
    num = secondHalf ? num : ~num;  //invert 2nd half
    return num;
}


// print example, paste in desmos to view. Converted to range 0 to 1
#include <stdio.h> //printf
int main(){
    uint32_t samples = ((uint32_t)~0);
    uint32_t inc = ((uint32_t)~0)/255;

    for(uint32_t i=0;i<samples-inc-1;i+=inc){
        printf("(%f,%f)",(double)i/samples,(double)(ukaelSine(i))/samples );
        if(i<samples-2*inc){printf(",");}
    }

    return 0;
}
*/
wat is dis?

A sine wave approximation using parabola calculated by bitwise operations.
I wanted a fast way to generate waveforms which amplitude ranges from 0 to uint32