Untitled

//clang++ -S -mllvm --x86-asm-syntax=intel test.cpp

#include <stddef.h>

namespace
{
    typedef char int8_t;
    typedef short int16_t;
    typedef int int32_t;
    typedef unsigned char uint8_t;
    typedef unsigned short uint16_t;
    typedef unsigned int uint32_t;
#if defined(USE_STATIC_ASSERT)
    static_assert(sizeof( uint8_t) == 1, "wrong size" );
    static_assert(sizeof( uint16_t) == 2, "wrong size" );
    static_assert(sizeof( uint32_t) == 4, "wrong size" );
#endif

    struct reg8_t
    {
        uint8_t low;
        uint8_t high;
    };

    union reg16_t
    {
        uint16_t value;
        reg8_t part;
    };

    struct registers_t
    {
        reg16_t reg16_ax;
        reg16_t reg16_bx;
        reg16_t reg16_cx;
        reg16_t reg16_dx;

        uint16_t sp;
        uint16_t bp;
        uint16_t si;
        uint16_t di;
        uint16_t ip;

        uint16_t es;
        uint16_t cs;
        uint16_t ss;
        uint16_t ds;
    };

    typedef uint8_t memory_t[0x1FFFFF]; //max 20bits addressable ram

    struct storage_t
    {
        registers_t registers;
        memory_t memory;
    };

    static storage_t storage;

    static const void* const MEMORY_START_ADDRESS = &storage.memory[0];

    static const uint32_t offset32( const int& p_segment, const int& p_offset )
    {
        return ( p_segment * 16 + p_offset );
    }

    static const size_t MCGA_OFFSET = offset32(0xA000,0);
    static const size_t MCGA_SIZE = 0xFFFF;
    static const void* const MCGA_MEM_BEGIN = &storage.memory[MCGA_OFFSET];
    static const void* const MCGA_MEM_END = &storage.memory[MCGA_OFFSET+MCGA_SIZE]; //next byte after last visible pixel

    static const bool in_mcga_ram(const void* const p_ptr)
    {
        return (p_ptr >= MCGA_MEM_BEGIN && p_ptr < MCGA_MEM_END);
    }

    static const uint8_t parity[0x100] =
    {
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
    };

    //VS2010 optimizer "bug"
    //global references to type less int aren't resolved
    //but global ptr consts are resolved

    //registers and components available as global references
    uint16_t& ax = storage.registers.reg16_ax.value;
    uint16_t& bx = storage.registers.reg16_bx.value;
    uint16_t& cx = storage.registers.reg16_cx.value;
    uint16_t& dx = storage.registers.reg16_dx.value;

    uint8_t& al = storage.registers.reg16_ax.part.low;
    uint8_t& ah = storage.registers.reg16_ax.part.high;

    uint8_t& bl = storage.registers.reg16_bx.part.low;
    uint8_t& bh = storage.registers.reg16_bx.part.high;

    uint8_t& cl = storage.registers.reg16_cx.part.low;
    uint8_t& ch = storage.registers.reg16_cx.part.high;

    uint8_t& dl = storage.registers.reg16_dx.part.low;
    uint8_t& dh = storage.registers.reg16_dx.part.high;

    uint16_t& sp = storage.registers.sp;
    uint16_t& bp = storage.registers.bp;
    uint16_t& si = storage.registers.si;
    uint16_t& di = storage.registers.di;
    uint16_t& ip = storage.registers.ip;

    uint16_t& es = storage.registers.es;
    uint16_t& cs = storage.registers.cs;
    uint16_t& ss = storage.registers.ss;
    uint16_t& ds = storage.registers.ds;

    static uint16_t useseg = 0;
    static uint8_t segoverride = 0;

    static uint8_t cf = 0;
    static uint8_t pf = 0;
    static uint8_t af = 0;
    static uint8_t zf = 0;
    static uint8_t sf = 0;
    static uint8_t of = 0;

    //static globals
    static uint16_t* const modregrm_reg16[8]={&ax,&cx,&dx,&bx,&sp,&bp,&si,&di};
    static uint8_t* const modregrm_reg8[8]={&al,&cl,&dl,&bl,&ah,&ch,&dh,&bh};

    template<typename ValueType>
    struct bitsize_T
    {
    };

    template<>
    struct bitsize_T<uint8_t>
    {
        static uint8_t* const P_reg(const int& p_nr)
        {
            return modregrm_reg8[p_nr];
        }
        static uint8_t& r_reg(const int& p_nr ){ return *P_reg(p_nr); }

        static const uint8_t SIGN_BITS = 0x80;
        static const uint16_t CARRY_BITS = 0xFF00;
        typedef uint16_t result_type;

        static const uint8_t& get_parity( const uint8_t& p_value )
        {
            return parity[p_value];
        }

        static const uint16_t sign_extend( const uint8_t& p_value )
        {
            return static_cast<int16_t>(static_cast<int8_t>(p_value));
        }
    };

    template<>
    struct bitsize_T<uint16_t>
    {
        static uint16_t* const P_reg(const int& p_nr)
        {
            return modregrm_reg16[p_nr];
        }
        static uint16_t& r_reg(const int& p_nr ){ return *P_reg(p_nr); }

        static const uint16_t SIGN_BITS = 0x8000;
        static const uint32_t CARRY_BITS = 0xFFFF0000;
        typedef uint32_t result_type;

        static const uint8_t& get_parity( const uint16_t& p_value )
        {
            return parity[p_value & 0xFF]; // & 0xFF needed - or better assert
        }

        static const uint32_t sign_extend( const uint16_t& p_value )
        {
            return static_cast<int32_t>(static_cast<int16_t>(p_value));
        }
    };

    template<typename ValueType>
    static ValueType* const P_mem( const int& p_offset )
    {
        return reinterpret_cast<ValueType*>(storage.memory + p_offset);
    }

    template<typename ValueType>
    static ValueType* const P_mem( const int& p_segment, const int& p_offset )
    {
        return P_mem<ValueType>(offset32(p_segment, p_offset));
    }

    template<typename ValueType>
    static ValueType& r_mem( const int& p_offset )
    {
        return *P_mem<ValueType>(p_offset);
    }

    template<typename ValueType>
    static ValueType& r_mem(const int& p_segment, const int& p_offset )
    {
        return *P_mem<ValueType>(p_segment,p_offset);
    }

    //maybe they should not write, read the real mem - just observer the action

    template<typename ValueType>
    static void write_mem( void* const p_ptr, const ValueType& p_value )
    {
        ValueType* const ptr = reinterpret_cast<ValueType*>( p_ptr );

        if( in_mcga_ram(p_ptr) )
        {
            //screen_update=true;
        }
    }

    template<typename ValueType>
    static const ValueType& read_mem( ValueType* const p_ptr)
    {
        return *p_ptr;
    }

    template<typename ValueType>
    static const ValueType& read_cs_ip()
    {
        const ValueType& value = r_mem<ValueType>(cs,ip);
        ip += sizeof(ValueType);
        return value;
    }

    //two parameter encoding - reg + reg/mem
    template<typename ValueType>
    static ValueType* const get_memory_target(const uint8_t& mode_value, const uint8_t& rm_value)
    {
        uint16_t disp = 0;

        switch (mode_value)
        {
        case 0:
            if (rm_value == 6)
            {
                disp = read_cs_ip<uint16_t>();
            }
            if (!segoverride && ((rm_value == 2) || (rm_value == 3)))
            {
                useseg = ss;
            }
            break;
        case 1:
            disp = bitsize_T<ValueType>::sign_extend(read_cs_ip<uint8_t>());
            if (!segoverride && ((rm_value == 2) || (rm_value == 3) || (rm_value == 6)))
            {
                useseg = ss;
            }
            break;
        case 2:
            disp = read_cs_ip<uint16_t>();
            if (!segoverride && ((rm_value == 2) || (rm_value == 3) || (rm_value == 6)) )
            {
                useseg = ss;
            }
            break;
        }

        //getea

        uint32_t offset = 0;
        switch (mode_value)
        {
        case 0:
            switch (rm_value)
            {
            case 0: offset = bx + si; break;
            case 1: offset = bx + di; break;
            case 2: offset = bp + si; break;
            case 3: offset = bp + di; break;
            case 4: offset = si; break;
            case 5: offset = di; break;
            case 6: offset = disp; break;
            case 7: offset = bx; break;
            }
            break;
        case 1:
        case 2:
            switch (rm_value)
            {
            case 0: offset = bx + si + disp; break;
            case 1: offset = bx + di + disp; break;
            case 2: offset = bp + si + disp; break;
            case 3: offset = bp + di + disp; break;
            case 4: offset = si + disp; break;
            case 5: offset = di + disp; break;
            case 6: offset = bp + disp; break;
            case 7: offset = bx + disp; break;
            }
            break;
        }

        return P_mem<ValueType>(useseg,offset);
    }

    template<typename ValueType>
    struct modregrm_T
    {
        const uint8_t mode_value;
        const uint8_t reg_value;
        const uint8_t rm_value;

        ValueType* const reg_target;
        ValueType* const rm_target;
        const bool is_mem_target;

        modregrm_T(const ValueType& p_address_byte):
            mode_value(p_address_byte >> 6),
            reg_value((p_address_byte >> 3) & 7),
            rm_value(p_address_byte & 7),
            reg_target(bitsize_T<ValueType>::P_reg(reg_value)),
            is_mem_target(mode_value < 3),
            rm_target(is_mem_target ? get_memory_target<ValueType>(mode_value,rm_value) : bitsize_T<ValueType>::P_reg(rm_value))
        {
        }

        ValueType& reg() const
        {
            return *reg_target;
        }

        const ValueType& rm() const
        {
            if(is_mem_target)
            {
                *rm_target = read_mem<ValueType>(rm_target);
            }
            return *rm_target;
        }

        void rm( const ValueType& p_value ) const
        {
            if(is_mem_target)
            {
                write_mem<ValueType>(rm_target,p_value); // memory observation
            }
            *rm_target = p_value;
        }
    };

    //--------------------------------------------------------------
    // base operations for 8 and 16bit
    //--------------------------------------------------------------

    template<typename ValueType>
    static void set_cf( const typename bitsize_T<ValueType>::result_type& p_dst )
    {
        cf = (p_dst & bitsize_T<ValueType>::CARRY_BITS) ? 1 : 0;
    }

    template<typename ValueType>
    static void set_of( const typename bitsize_T<ValueType>::result_type& p_dst, const ValueType& p_oper1, const ValueType& p_oper2 )
    {
        of = ((p_dst ^ p_oper1) & (p_oper1 ^ p_oper2) & bitsize_T<ValueType>::SIGN_BITS) ? 1 : 0;
    }

    template<typename ValueType>
    static void set_af( const typename bitsize_T<ValueType>::result_type& p_dst, const ValueType& p_oper1, const ValueType& p_oper2 )
    {
        af = ((p_oper1 ^ p_oper2 ^ p_dst) & 0x10) ? 1 : 0;
    }

    template<typename ValueType>
    static void set_zf( const ValueType& p_value )
    {
        zf = (!p_value) ? 1 : 0; //set or clear zero flag
    }

    template<typename ValueType>
    static void set_sf( const ValueType& p_value )
    {
        sf = (p_value & bitsize_T<ValueType>::SIGN_BITS) ? 1 : 0; //set or clear sign flag
    }

    template<typename ValueType>
    static void set_pf( const ValueType& p_value )
    {
        pf = bitsize_T<ValueType>::get_parity(p_value); //retrieve parity state from lookup table
    }

    template<typename ValueType>
    static void flag_szp(const ValueType& p_value)
    {
        set_zf<ValueType>(p_value);
        set_sf<ValueType>(p_value);
        set_pf<ValueType>(p_value);
    }

    template<typename ValueType>
    static void flag_sub_add(typename bitsize_T<ValueType>::result_type p_dst, const ValueType& p_oper1, const ValueType& p_oper2 )
    {
        flag_szp((ValueType)p_dst);

        set_cf<ValueType>(p_dst);
        set_of<ValueType>(p_dst, p_oper1, p_oper2);
        set_af<ValueType>(p_dst, p_oper1, p_oper2);
    }

    //v1 = destination operand, v2 = source operand (only difference to add is "-")
    template<typename ValueType>
    static void flag_sub(const ValueType& p_oper1, const ValueType& p_oper2 )
    {
        typedef typename bitsize_T<ValueType>::result_type result_type;

        const result_type dst = (result_type)p_oper1 - (result_type)p_oper2;

        flag_sub_add( dst, p_oper1, p_oper2 );
    }

    //v1 = destination operand, v2 = source operand (only difference to sub is "+")
    template<typename ValueType>
    static void flag_add(const ValueType& p_oper1, const ValueType& p_oper2 )
    {
        typedef typename bitsize_T<ValueType>::result_type result_type;
        const result_type dst = (result_type)p_oper1 + (result_type)p_oper2;
        flag_sub_add( dst, p_oper1, p_oper2 );
    }

    //unsing a static void run - or operator() const results in identical code

    template<typename ValueType>
    struct sub_T
    {
        void operator()( ValueType& p_oper1, const ValueType& p_oper2 ) const
        {
            flag_sub<ValueType>(p_oper1, p_oper2);
            const ValueType res = p_oper1 - p_oper2;
            p_oper1 = res;
        }
    };

    template<typename ValueType>
    struct add_T
    {
        void operator()( ValueType& p_oper1, const ValueType& p_oper2 ) const
        {
            flag_add<ValueType>(p_oper1, p_oper2);
            const ValueType res = p_oper1 + p_oper2;
            p_oper1 = res;
        }
    };

    //--------------------------------------------------------------
    // helper templates for interpreter-code
    //--------------------------------------------------------------

    //(reg&, const reg/mem&)
    //(reg/mem&, const reg&)
    //(reg&, const imm&)
    //(mem&, const imm&)

    //for operations that got moderegrm based (reg&, const reg/mem&) parameter set
    //template template - Operation needs to be class
    template <typename ValueType, template <typename> class Operation>
    static void reg_rm_T()
    {
        const modregrm_T<ValueType> params(read_cs_ip<ValueType>());
        Operation<ValueType>()(params.reg(), params.rm());
    }

    //for operations that got moderegrm based (reg/mem&, const reg&) parameter set
    //template template - Operation needs to be class
    template <typename ValueType, template <typename> class Operation>
    static void rm_reg_T()
    {
        const modregrm_T<ValueType> params(read_cs_ip<ValueType>());
        ValueType oper1 = params.rm();
        Operation<ValueType>()(oper1, params.reg());
        params.rm(oper1);
    };

    //for ref, imm parameter sets
    template<typename ValueType, template <typename> class Operation>
    static void ref_read_cs_ip(ValueType& p_ref)
    {
        Operation<ValueType>()(p_ref,read_cs_ip<ValueType>());
    }

    //al, imm8
    template<template <typename> class Operation>
    static void al_read_cs_ip()
    {
        ref_read_cs_ip<uint8_t,Operation>(al);
    }

    //ax, imm16
    template<template <typename> class Operation>
    static void ax_read_cs_ip()
    {
        ref_read_cs_ip<uint16_t,Operation>(ax);
    }

    //--------------------------------------------------------------
    //"user"-code
    //--------------------------------------------------------------

    // memory access

    const uint8_t& byte(const uint16_t& p_segment, const uint16_t& p_offset)
    {
        return r_mem<uint8_t>(p_segment, p_offset);
    }

    uint8_t* const byte_ptr(const uint16_t& p_segment, const uint16_t& p_offset)
    {
        return P_mem<uint8_t>(p_segment, p_offset);
    }

    const uint16_t& word(const uint16_t& p_segment, const uint16_t& p_offset)
    {
        return r_mem<uint16_t>(p_segment, p_offset);
    }

    uint16_t* const word_ptr(const uint16_t& p_segment, const uint16_t& p_offset)
    {
        return P_mem<uint16_t>(p_segment, p_offset);
    }

    // mnemonics

    void nop()
    {
    }

    void sub(uint8_t* p_oper1, const uint8_t& p_oper2)
    {
        sub_T<uint8_t>()(*p_oper1, p_oper2);
        write_mem<uint8_t>(p_oper1,*p_oper1); // memory observation
    }

    void sub(uint16_t* p_oper1, const uint16_t& p_oper2)
    {
        sub_T<uint16_t>()(*p_oper1, p_oper2);
        write_mem<uint16_t>(p_oper1,*p_oper1); // memory observation
    }

    void sub(uint16_t& p_oper1, const uint16_t& p_oper2)
    {
        sub_T<uint16_t>()(p_oper1, p_oper2);
    }

    void sub(uint8_t& p_oper1, const uint8_t& p_oper2)
    {
        sub_T<uint8_t>()(p_oper1, p_oper2);
    }

    void add(uint16_t& p_oper1, const uint16_t& p_oper2)
    {
        add_T<uint16_t>()(p_oper1, p_oper2);
    }

    void add(uint8_t& p_oper1, const uint8_t& p_oper2)
    {
        add_T<uint8_t>()(p_oper1, p_oper2);
    }

    bool jgl()
    {
        return (zf != 0 && cf == 1);
    }

    //random values from parameter pointers
    static void set_random_start_values(int argc, char** argv, const size_t& p_dummy)
    {
        uint32_t random1 = *reinterpret_cast<int32_t*>(argv) + p_dummy;
        uint32_t random2 = *reinterpret_cast<int32_t*>(&argc) + p_dummy;
        ah = random1 & 0xFF;
        al = (random1 >> 8) & 0xFF;
        dh = (random1 >> 16) & 0xFF;
        dl = (random1 >> 24) & 0xFF;
        ch = random2 & 0xFF;
        cl = (random2 >> 8) & 0xFF;
        bh = (random2 >> 16) & 0xFF;
        bl = (random2 >> 24) & 0xFF;
        si = ((random1+random2) >> 16) & 0xFFFF;
    }

    static uint32_t dummy = 0;

    //something to prevent optimizer from removing testcode
    static void calc_dummy(size_t p_dummy)
    {
        dummy *= ax + bx + dx + cx + si + p_dummy;
    }
}

#include <cstdio>

namespace blub
{
    int free_test(int argc, char** argv)
    {
        const size_t TESTS = 10000;
        for(size_t t = 0; t < TESTS; ++t)
        {

            for(size_t x = 0; x < 10000; ++x )
            {
                set_random_start_values(argc,argv,x);

                //sub ax,dx
                storage.memory[0]=0x29;
                storage.memory[1]=0xD0;
                //nop
                storage.memory[2]=0x90;
                //sub cl,dl
                storage.memory[3]=0x28;
                storage.memory[4]=0xD9;
                //sub dx,[bx]
                storage.memory[5]=0x2B;
                storage.memory[6]=0x17;
                //sub bl,[si]
                storage.memory[7]=0x2A;
                storage.memory[8]=0x1C;
                //sub bl,[si+1]
                storage.memory[9]=0x2A;
                storage.memory[10]=0x5C;
                storage.memory[11]=0x01;
                //sub [si+1],al
                storage.memory[12]=0x28;
                storage.memory[13]=0x44;
                storage.memory[14]=0x01;
                //add dx,ax
                storage.memory[15]=0x01;
                storage.memory[16]=0xC2;
                //add cl,bl
                storage.memory[17]=0x00;
                storage.memory[18]=0xD9;

                cs = 0;
                ip = 0;

                //interpreter run
                while( offset32(cs,ip) < 19 )
                {
                    const uint8_t opcode = read_cs_ip<uint8_t>();

                    switch( opcode )
                    {
                    case 0x00: { rm_reg_T<uint8_t,add_T>(); } break; // ADD Eb Gb
                    case 0x01: { rm_reg_T<uint16_t,add_T>(); } break; // ADD Ev Gv
                    case 0x04: { al_read_cs_ip<add_T>(); } break; // ADD al Ib
                    case 0x05: { ax_read_cs_ip<add_T>(); } break; // ADD ax Iv
                    case 0x28: { rm_reg_T<uint8_t,sub_T>(); } break; // SUB Eb Gb
                    case 0x29: { rm_reg_T<uint16_t,sub_T>(); } break; // SUB Ev Gv
                    case 0x2A: { reg_rm_T<uint8_t,sub_T>(); } break; // SUB Gb Eb
                    case 0x2B: { reg_rm_T<uint16_t,sub_T>(); } break; // SUB Gv Ev
                    case 0x90: break; // NOP
                    }

                    //the optimizer needs something dependend to not remove the testcode
                    calc_dummy(x);
                }


                //native run
                for(size_t i = 0; i<9; ++i)
                {
                    switch(i)
                    {
                    case 0: sub(ax,dx); break;
                    case 1: nop(); break;
                    case 2: sub(cl,dl); break;
                    case 3: sub(dx,word(ds,bx)); break;
                    case 4: sub(bl,byte(ds,si)); break;
                    case 5: sub(bl,byte(ds,si+1)); break;
                    case 6: sub(byte_ptr(ds,si+1),al); break;
                    case 7: add(dx,ax); break;
                    case 8: add(cl,bl); break;
                    }

                    //the optimizer needs something dependend to not remove the testcode
                    calc_dummy(x);
                }


            }
        }

        return dummy; // this is also for the optimizer
    }