Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import os
- from itertools import chain
- def sgn_ext(value):
- return (value & 0xffff) | (0xff00 if (value & 0x0080) else 0)
- def to_2compl(value, is_word):
- if value & (1 << (nr_bits(is_word) - 1)) != 0:
- value = abs(abs(value) - (1 << nr_bits(is_word)))
- return value
- def from_2compl(value, is_word):
- if value & (1 << (nr_bits(is_word) - 1)):
- return -((~(value - 1)) & ((1 << nr_bits(is_word)) - 1))
- else:
- return value
- def nr_bytes(word):
- return 2 if word else 1
- def nr_bits(word):
- return nr_bytes(word) * 8
- def unpack_w(byte_stream):
- return byte_stream[0] + (byte_stream[1] << 8)
- def sub_2compl(a, b, is_word):
- return (a + to_2compl(-b, is_word))
- def hexword(value):
- return "0x{0:0>4}".format(hex(value & 0xffff)[2:])
- def hexbyte(value):
- return "0x{0:0>2}".format(hex(value & 0x00ff)[2:])
- def hexdump_from_mem(mem_bytes, separator=' '):
- return separator.join(hexbyte(b) for b in mem_bytes)
- class Ram():
- def __init__(self, size):
- self._m = [0] * size
- def write(self, start, bytes):
- for p in xrange(start, start + len(bytes)):
- self._m[p] = bytes[p]
- def write_b(self, start, byte):
- self._m[start] = byte
- def write_w(self, start, word):
- self._m[start] = word & 0x00ff
- self._m[start + 1] = (word & 0xff00) >> 8
- def read(self, start, length):
- return self._m[start:start + length]
- def read_w(self, pos):
- return unpack_w([self.read_b(pos), self.read_b(pos + 1)])
- def read_b(self, pos):
- return self._m[pos]
- def hexdump(self, start, length):
- return hexdump_from_mem(self.read(start, length))
- class WordStack():
- def __init__(self, size):
- self._m = Ram(size)
- self._sp = 0
- def push(self, value):
- self._sp -= 2
- self._m.write_w(self._sp, value)
- def pop(self):
- self._sp += 2
- return self._m.read_w(self._sp - 2)
- def pos(self):
- return self._sp
- def set_pos(self, sp):
- self._sp = sp
- def hexdump(self, length):
- return self._m.hexdump(self._sp, length)
- class BaseState(object):
- def __init__(self, ram=Ram(256 * 1024), stack=WordStack(64 * 1024)):
- self._ram = ram
- self._ip = 0
- self._cf = 0
- self._sf = 0
- self._zf = 0
- self._stack = stack
- self._reg_val = [0] * 8
- def reg_value(self, reg_index):
- return self._reg_val[reg_index]
- def read_ram(self, start, is_word):
- if is_word:
- return self._ram.read_w(start)
- else:
- return self._ram.read_b(start)
- def write_ram(self, start, value, is_word):
- raise NotImplementedError
- def ram(self, start, length):
- return self._ram.read(start, length)
- def instr_bytes(self):
- return self._ram.read(self._ip, 6)
- def ip(self):
- return self._ip
- def cf(self):
- return self._cf
- def zf(self):
- return self._zf
- def sf(self):
- return self._sf
- def sp(self):
- return self.reg_value(4)
- def ram_hexdump(self, address, length):
- return self._ram.hexdump(address, length)
- def __str__(self):
- def reg_values_str(is_word):
- return " ".join(
- "{0}:{2}{1}".format(
- Register.name_from_index(index, is_word),
- hexword(self._reg_val[index]) if is_word else
- hexbyte(self._reg_val[index]),
- '' if is_word else ' '
- )
- for index in xrange(8)
- )
- return ("CF:{0} ZF:{1} SF:{2} IP:{3}\n{4}\n{5}\n{6}"
- "".format(self.cf(), self.zf(), self.sf(), hexword(self.ip()),
- reg_values_str(True), reg_values_str(False),
- ("stack: {0} ..."
- "".format(self._stack.hexdump(8))
- )
- )
- )
- class HaltState(BaseState):
- def _nop(self, *args, **kwargs):
- pass
- def halted(self):
- return True
- def __getattr__(self, name):
- return self._nop
- class State(BaseState):
- def halt(self):
- self.__class__ = HaltState
- def halted(self):
- return False
- def load_ram(self, start, mem):
- mem = list(mem)
- self._ram.write(start, mem)
- def move_ip(self, offset):
- self._ip += offset
- def set_ip(self, ip):
- self._ip = ip
- def set_cf(self, cf):
- self._cf = 1 if cf else 0
- def write_ram(self, start, value, is_word):
- if is_word:
- self._ram.write_w(start, value & 0xffff)
- else:
- self._ram.write_b(start, value & 0x00ff)
- def _zf_from_result(self, result, is_word):
- return 1 if (result & ((1 << nr_bits(is_word)) - 1)) == 0 else 0
- def _sf_from_result(self, res, is_word):
- return (res & (1 << (nr_bits(is_word) - 1))) >> (nr_bits(is_word) - 1)
- def set_flags_from_sub_operands(self, op1, op2, is_word):
- result = sub_2compl(op1, op2, is_word)
- self._cf = int(op2 > op1)
- self._zf = self._zf_from_result(result, is_word)
- self._sf = self._sf_from_result(result, is_word)
- def set_flags_from_add_result(self, result, is_word):
- self._cf = int(result > ((1 << nr_bits(is_word)) - 1))
- self._zf = self._zf_from_result(result, is_word)
- self._sf = self._sf_from_result(result, is_word)
- def set_flags_from_logical_result(self, result, is_word):
- # at least OK when only considering ZF, SF, CF
- self.set_flags_from_add_result(result, is_word)
- def set_flags_from_inc_dec_result(self, result, is_word):
- # at least OK when only considering ZF, SF, CF
- cf = self.cf()
- self.set_flags_from_add_result(result, is_word)
- self._cf = cf
- def set_reg_value(self, reg_index, value, is_word):
- if is_word:
- self._reg_val[reg_index] = value & 0xffff
- else:
- self._reg_val[reg_index] = ((self._reg_val[reg_index] & 0xff00) +
- (value & 0xff)
- )
- self._stack.set_pos(self.sp())
- def push_stack(self, value):
- self._stack.push(value)
- self.set_reg_value(4, self._stack.pos(), True)
- def pop_stack(self):
- t = self._stack.pop()
- self.set_reg_value(4, self._stack.pos(), True)
- return t
- class Machine():
- class Console():
- def __init__(self, rows, cols):
- self.rows = rows
- self.cols = cols
- def dbg_fill_display(self, display):
- mem = ''.join(str(y % 10) * self.cols for y in xrange(self.rows))
- self.update(mem, display)
- def update(self, screen_mem, display):
- for y in xrange(self.rows):
- for x in xrange(self.cols):
- display.set_char(x, y, screen_mem[y * self.cols + x])
- display.refresh()
- def __init__(self, state, disasm):
- self._console = Machine.Console(self.rows(), self.cols())
- self._disasm = disasm
- self._state = state
- def rows(self):
- return 25
- def cols(self):
- return 80
- def load(self, program):
- self._state.load_ram(0, (ord(c) for c in program))
- self._ip = 0
- self._last_ip = len(program)
- def _screen_mem(self):
- return self._state.ram(int('0x8000', 16), self.rows()*self.cols())
- def _exec_instr(self):
- instr = self._disasm.decode_instr(self._state.instr_bytes())
- self._state.move_ip(instr.length())
- instr.execute(self._state)
- def state(self):
- return self._state
- def next_clock(self, console_display):
- if console_display is not None:
- self._console.update(self._screen_mem(), console_display)
- self._exec_instr()
- return self._state.ip() != self._last_ip
- class Register():
- _wname_from_bits = {
- '000':'AX',
- '001':'CX',
- '010':'DX',
- '011':'BX',
- '100':'SP',
- '101':'BP',
- '110':'SI',
- '111':'DI',
- }
- _bname_from_bits = {
- '000':'AL',
- '001':'CL',
- '010':'DL',
- '011':'BL',
- '100':'AH',
- '101':'CH',
- '110':'DH',
- '111':'BH',
- }
- @classmethod
- def index_from_name(cls, name):
- return int(Register.bits_from_name_dict()[name], 2)
- @classmethod
- def bits_from_name_dict(cls):
- return dict((v, u) for u, v in (chain(cls._wname_from_bits.items(),
- cls._bname_from_bits.items(),
- )
- )
- )
- @classmethod
- def name_from_index(cls, index, is_word):
- return {
- True: cls._wname_from_bits[bin(index)[2:].rjust(3, '0')],
- False: cls._bname_from_bits[bin(index)[2:].rjust(3, '0')],
- }[is_word]
- def __init__(self, bits):
- self._bits = bits
- def index(self):
- return int(self._bits, 2)
- def name(self, is_word):
- return {
- True:self._wname_from_bits[self._bits],
- False:self._bname_from_bits[self._bits],
- }[is_word]
- def __str__(self):
- return self.name(True)
- class EffectiveAddress(object):
- def __init__(self, register1, register2, disp):
- self._reg1 = register1
- self._reg2 = register2
- self._disp = disp
- if self._reg1 is None:
- self.__class__ = EffectiveAddressDispOnly
- elif self._reg2 is None:
- self.__class__ = EffectiveAddressDispAndReg1
- else:
- self.__class__ = EffectiveAddressDispAndReg1AndReg2
- def _str_add_disp(self):
- return '' if self._disp == 0 else ' + {0}'.format(hex(self._disp))
- class EffectiveAddressDispAndReg1AndReg2(EffectiveAddress):
- def __str__(self):
- return '[({0}) + ({1}){2}]'.format(self._reg1,
- self._reg2,
- self._str_add_disp()
- )
- def address(self, machine_state):
- return (machine_state.reg_value(self._reg1.index()) +
- machine_state.reg_value(self._reg2.index()) +
- self._disp
- )
- class EffectiveAddressDispAndReg1(EffectiveAddress):
- def __str__(self):
- return '[({0}){1}]'.format(self._reg1, self._str_add_disp())
- def address(self, machine_state):
- return machine_state.reg_value(self._reg1.index()) + self._disp
- class EffectiveAddressDispOnly(EffectiveAddress):
- def __str__(self):
- return '[{0}]'.format(hex(self._disp))
- def address(self, machine_state):
- return self._disp
- def sub_upd_flags(op1, op2, is_word, machine_state):
- machine_state.set_flags_from_sub_operands(op1, op2, is_word)
- return sub_2compl(op1, op2, is_word)
- class BaseOp(object):
- def __init__(self, instr_length, name):
- self._instr_length = instr_length
- self._name = name
- if hasattr(self, "exec_{0}".format(name)):
- self.execute = getattr(self, "exec_{0}".format(name))
- def length(self):
- return self._instr_length
- def __str__(self):
- return "{0}".format(self._name)
- class Op_Reg_Immediate(BaseOp):
- def exec_cmp(self, machine_state):
- sub_upd_flags(machine_state.reg_value(self._reg.index()),
- self._im,
- self._is_word,
- machine_state
- )
- def exec_sub(self, machine_state):
- machine_state.set_reg_value(
- self._reg.index(),
- sub_upd_flags(
- machine_state.reg_value(self._reg.index()),
- self._im,
- self._is_word,
- machine_state
- ),
- self._is_word,
- )
- def exec_add(self, machine_state):
- result = machine_state.reg_value(self._reg.index()) + self._im
- machine_state.set_reg_value(self._reg.index(), result, self._is_word)
- machine_state.set_flags_from_add_result(result, self._is_word)
- def exec_mov(self, machine_state):
- machine_state.set_reg_value(self._reg.index(), self._im, self._is_word)
- def exec_adc(self, machine_state):
- result = (machine_state.reg_value(self._reg.index()) +
- self._im +
- machine_state.cf()
- )
- machine_state.set_reg_value(self._reg.index(), result, self._is_word)
- machine_state.set_flags_from_add_result(result, self._is_word)
- def exec_and(self, machine_state):
- result = machine_state.reg_value(self._reg.index()) & self._im
- machine_state.set_reg_value(self._reg.index(), result, self._is_word)
- machine_state.set_flags_from_logical_result(result, self._is_word)
- def __init__(self, instr_length, name, register, immediate, is_word):
- super(Op_Reg_Immediate, self).__init__(instr_length, name)
- self._im = immediate
- self._reg = register
- self._is_word = is_word
- def __str__(self):
- return "{0} {1}, {2}".format(self._name,
- self._reg.name(self._is_word),
- hex(self._im)
- )
- class Nop(BaseOp):
- def __str__(self):
- return "nop"
- def execute(self, machine_state):
- pass
- class Op_Reg_Reg(BaseOp):
- def _logical_reg_reg(self, machine_state, log_op):
- result = log_op(machine_state.reg_value(self._reg1.index()),
- machine_state.reg_value(self._reg2.index())
- )
- machine_state.set_reg_value(self._reg1.index(), result, self._is_word)
- machine_state.set_flags_from_logical_result(result, self._is_word)
- def exec_xor(self, machine_state):
- self._logical_reg_reg(machine_state, lambda a, b: a ^ b)
- def exec_or(self, machine_state):
- self._logical_reg_reg(machine_state, lambda a, b: a | b)
- def exec_and(self, machine_state):
- self._logical_reg_reg(machine_state, lambda a, b: a & b)
- def exec_cmp(self, machine_state):
- sub_upd_flags(machine_state.reg_value(self._reg1.index()),
- machine_state.reg_value(self._reg2.index()),
- self._is_word,
- machine_state
- )
- def exec_add(self, machine_state):
- result = (machine_state.reg_value(self._reg1.index()) +
- machine_state.reg_value(self._reg2.index())
- )
- machine_state.set_reg_value(self._reg1.index(), result, self._is_word)
- machine_state.set_flags_from_add_result(result, self._is_word)
- def exec_sbb(self, machine_state):
- machine_state.set_reg_value(
- self._reg1.index(),
- sub_upd_flags(
- machine_state.reg_value(self._reg1.index()),
- (machine_state.reg_value(self._reg2.index()) +
- machine_state.cf()
- ),
- self._is_word,
- machine_state
- ),
- self._is_word,
- )
- def exec_xchg(self, machine_state):
- rv1 = machine_state.reg_value(self._reg1.index())
- rv2 = machine_state.reg_value(self._reg2.index())
- machine_state.set_reg_value(self._reg1.index(), rv2, self._is_word)
- machine_state.set_reg_value(self._reg2.index(), rv1, self._is_word)
- def exec_mov(self, machine_state):
- machine_state.set_reg_value(
- self._reg1.index(),
- machine_state.reg_value(self._reg2.index()),
- self._is_word)
- def __init__(self, instr_length, name, register1, register2, is_word):
- super(Op_Reg_Reg, self).__init__(instr_length, name)
- self._reg1 = register1
- self._reg2 = register2
- self._is_word = is_word
- if self._name == 'xchg' and self._reg1.index() == self._reg2.index():
- self.__class__ = Nop
- def __str__(self):
- return "{0} {1}, {2}".format(self._name,
- self._reg1.name(self._is_word),
- self._reg2.name(self._is_word),
- )
- class Op_Ea_Immediate(BaseOp):
- def __init__(self, instr_length, name, ea, immediate, is_word):
- super(Op_Ea_Immediate, self).__init__(instr_length, name)
- self._ea = ea
- self._im = immediate
- self._word = is_word
- def exec_cmp(self, machine_state):
- sub_upd_flags(
- machine_state.read_ram(self._ea.address(machine_state), self._word),
- self._im,
- self._word,
- machine_state
- )
- def exec_add(self, machine_state):
- ea_addr = self._ea.address(machine_state)
- result = machine_state.read_ram(ea_addr, self._word) + self._im
- machine_state.write_ram(ea_addr, result, self._word)
- machine_state.set_flags_from_add_result(result, self._word)
- def exec_or(self, machine_state):
- ea_addr = self._ea.address(machine_state)
- result = machine_state.read_ram(ea_addr, self._word) | self._im
- machine_state.write_ram(ea_addr, result, self._word)
- machine_state.set_flags_from_logical_result(result, self._word)
- def exec_mov(self, machine_state):
- machine_state.write_ram(
- self._ea.address(machine_state), self._im, self._word
- )
- def __str__(self):
- return "{0} {3} {1}, {2}".format(self._name,
- self._ea,
- hex(self._im),
- 'word' if self._word else 'byte')
- class Op_Ea_Reg(BaseOp):
- def exec_mov(self, machine_state):
- machine_state.write_ram(
- self._ea.address(machine_state),
- machine_state.reg_value(self._reg.index()),
- self._word
- )
- def exec_and(self, machine_state):
- ea_addr = self._ea.address(machine_state)
- result = (machine_state.read_ram(ea_addr, self._word) &
- machine_state.reg_value(self._reg.index())
- )
- machine_state.write_ram(ea_addr, result, self._word)
- machine_state.set_flags_from_logical_result(result, self._word)
- def exec_add(self, machine_state):
- ea_addr = self._ea.address(machine_state)
- result = (machine_state.read_ram(ea_addr, self._word) +
- machine_state.reg_value(self._reg.index())
- )
- machine_state.write_ram(ea_addr, result, self._word)
- machine_state.set_flags_from_add_result(result, self._word)
- def exec_sub(self, machine_state):
- machine_state.write_ram(
- self._ea.address(machine_state),
- sub_upd_flags(
- machine_state.read_ram(self._ea.address(machine_state),
- self._word
- ),
- machine_state.reg_value(self._reg.index()),
- self._word,
- machine_state
- ),
- self._word
- )
- def __init__(self, instr_length, name, reg, ea, is_word):
- super(Op_Ea_Reg, self).__init__(instr_length, name)
- self._ea = ea
- self._name = name
- self._reg = reg
- self._word = is_word
- def __str__(self):
- return "{0} {3} {1}, {2}".format(self._name,
- self._ea,
- self._reg.name(self._word),
- 'word' if self._word else 'byte')
- class Op_Reg_Ea(BaseOp):
- def exec_mov(self, machine_state):
- machine_state.set_reg_value(self._reg.index(),
- machine_state.read_ram(
- self._ea.address(machine_state),
- self._word
- ),
- self._word
- )
- def __init__(self, instr_length, name, reg, ea, is_word):
- super(Op_Reg_Ea, self).__init__(instr_length, name)
- self._ea = ea
- self._reg = reg
- self._word = is_word
- def __str__(self):
- return "{0} {3} {2}, {1}".format(self._name,
- self._ea,
- self._reg.name(self._word),
- 'word' if self._word else 'byte')
- class Op_Reg(BaseOp):
- def exec_dec(self, machine_state):
- machine_state.set_reg_value(
- self._reg.index(),
- sub_2compl(machine_state.reg_value(self._reg.index()), 1, self._word),
- self._word
- )
- machine_state.set_flags_from_inc_dec_result(machine_state.reg_value(
- self._reg.index()
- ),
- self._word
- )
- def exec_inc(self, machine_state):
- machine_state.set_reg_value(
- self._reg.index(),
- machine_state.reg_value(self._reg.index()) + 1,
- self._word
- )
- machine_state.set_flags_from_inc_dec_result(machine_state.reg_value(
- self._reg.index()
- ),
- self._word
- )
- def exec_push(self, machine_state):
- machine_state.push_stack(machine_state.reg_value(self._reg.index()))
- def exec_pop(self, machine_state):
- machine_state.set_reg_value(self._reg.index(),
- machine_state.pop_stack(),
- self._word)
- def __init__(self, instr_length, name, reg, is_word):
- super(Op_Reg, self).__init__(instr_length, name)
- self._reg = reg
- self._word = is_word
- def __str__(self):
- return "{0} {1}".format(self._name,
- self._reg.name(self._word))
- class Op_Disp(BaseOp):
- def exec_jz(self, machine_state):
- if machine_state.zf():
- machine_state.move_ip(self._addr)
- def exec_jc(self, machine_state):
- if machine_state.cf():
- machine_state.move_ip(self._addr)
- def exec_jnz(self, machine_state):
- if not machine_state.zf():
- machine_state.move_ip(self._addr)
- def exec_jnbe(self, machine_state):
- if not machine_state.zf() and not machine_state.cf():
- machine_state.move_ip(self._addr)
- def exec_jns(self, machine_state):
- if not machine_state.sf():
- machine_state.move_ip(self._addr)
- def exec_jbe(self, machine_state):
- if machine_state.cf() or machine_state.zf():
- machine_state.move_ip(self._addr)
- def exec_jmp(self, machine_state):
- machine_state.move_ip(self._addr)
- def exec_call(self, machine_state):
- machine_state.push_stack(machine_state.ip())
- machine_state.move_ip(self._addr)
- def __init__(self, instr_length, name, address):
- super(Op_Disp, self).__init__(instr_length, name)
- self._is_word = True if instr_length == 3 else False
- self._addr = from_2compl(address, self._is_word)
- def rel_addr(self):
- return self._addr
- def __str__(self):
- return "{0} {1}".format(self._name, hex(self._addr))
- class Op_NoArgs(BaseOp):
- def exec_hlt(self, machine_state):
- machine_state.halt()
- def exec_ret(self, machine_state):
- machine_state.set_ip(machine_state.pop_stack())
- def exec_stc(self, machine_state):
- machine_state.set_cf(1)
- def __init__(self, instr_length, name):
- super(Op_NoArgs, self).__init__(instr_length, name)
- class Unknown_Op(BaseOp):
- def __init__(self):
- super(Unknown_Op, self).__init__(1, 'UNKNOWN')
- class Unpacker():
- def __init__(self, bitstr, byte_offset=0):
- self._bstr = bitstr[byte_offset * 8:]
- def dispatch(self, mod, rm):
- if mod == '00' and rm != '110':
- return 0
- elif mod == '00' and rm == '110':
- return self.unpack_w()
- else:
- return {
- '01':self.unpack_se_b(),
- '10':self.unpack_w()
- }[mod]
- def unpack_w(self, start=0):
- return unpack_w([self._peek(start), self._peek(start + 1)])
- def unpack_b(self, start=0):
- return self._peek(start)
- def unpack_se_b(self, start=0):
- return sgn_ext(self.unpack_b(start))
- def unpack(self, start, is_word, do_sgn_ext=False):
- if is_word:
- return self.unpack_w(start)
- elif do_sgn_ext:
- return self.unpack_se_b(start)
- else:
- return self.unpack_b(start)
- def _peek(self, offset):
- return int(self._bstr[offset * 8:(offset + 1) * 8], 2)
- class DisAsm():
- def __init__(self):
- self._dispatch_from_re = dict(
- (
- re.compile(
- p.format(
- w="(?P<w>[01]{1})",
- sw="(?P<sw>[01]{2})",
- dw="(?P<dw>[01]{2})",
- mod="(?P<mod>[01]{2})",
- reg="(?P<reg>[01]{3})",
- rm="(?P<rm>[01]{3})",
- disp="(?P<disp>[01]{8})"
- )
- ),
- f
- )
- for p, f in self.instruction_dict().items()
- )
- def instruction_dict(self):
- def is_word(w):
- return w == '1'
- def unpacker_from_bitstr(bitstr, start):
- return Unpacker(bitstr[start * 8:(start + 6) * 8])
- def unpack_w(match, start):
- return unpacker_from_bitstr(match.string, start).unpack_w()
- def unpack_b(match, start):
- return unpacker_from_bitstr(match.string, start).unpack_b()
- def unpack(match, start, is_word):
- return unpacker_from_bitstr(match.string, start).unpack(0, is_word)
- def op_w_acc(name, match):
- return Op_Reg_Immediate(
- 1 + nr_bytes(is_word(match.group('w'))),
- name,
- Register('000'),
- unpack(match, 1, is_word(match.group('w'))),
- is_word(match.group('w'))
- )
- def create_ea(mod, rm, dispatch):
- def reg_by_name(name):
- return Register(Register.bits_from_name_dict()[name])
- def reg1_reg2():
- return {
- '000': (reg_by_name('BX'), reg_by_name('SI')),
- '001': (reg_by_name('BX'), reg_by_name('DI')),
- '010': (reg_by_name('BP'), reg_by_name('SI')),
- '011': (reg_by_name('BP'), reg_by_name('DI')),
- '100': (reg_by_name('SI'), None),
- '101': (reg_by_name('DI'), None),
- '110': (reg_by_name('BP'), None),
- '111': (reg_by_name('BX'), None),
- }[rm]
- if mod == '00' and rm == '110':
- return EffectiveAddress(None, None, dispatch)
- else:
- return EffectiveAddress(reg1_reg2()[0],
- reg1_reg2()[1],
- dispatch
- )
- def _mnemonic_i_rm(instr_length, name, is_word, mod, i, rm, unpacker):
- if mod == '11':
- return Op_Reg_Immediate(instr_length,
- name,
- Register(rm),
- i,
- is_word
- )
- else:
- return Op_Ea_Immediate(
- instr_length,
- name,
- create_ea(mod, rm, unpacker.dispatch(mod, rm)),
- i,
- is_word
- )
- def _sw_mod_rm__op_i_rm(name, sw, mod, rm, unpacker):
- return _mnemonic_i_rm(
- 2 + _nr_ea_bytes(mod, rm) + nr_bytes(sw == '01'),
- name,
- sw[1] == '1',
- mod,
- unpacker.unpack(_data_byte_offset(mod, rm),
- sw == '01', sw[0] == '1'),
- rm,
- unpacker
- )
- def op_sw_mod_rm_im(name, match):
- return _sw_mod_rm__op_i_rm(name,
- match.group('sw'),
- match.group('mod'),
- match.group('rm'),
- unpacker_from_bitstr(match.string, 2)
- )
- def op_w_mod_rm_im(name, match):
- return _sw_mod_rm__op_i_rm(name,
- '0' + match.group('w'),
- match.group('mod'),
- match.group('rm'),
- unpacker_from_bitstr(match.string, 2)
- )
- def _nr_ea_bytes(mod, rm):
- return {
- '00': 2 if rm == '110' else 0,
- '01': 1,
- '10': 2,
- '11': 0,
- }[mod]
- def _data_byte_offset(mod, rm):
- return _nr_ea_bytes(mod, rm)
- def op_sw_mod_rm(match, name):
- return op_sw_mod_rm_im(name, match)
- def _mnemonic_rm_r(instr_length, name, to_reg, is_word, mod, rm, r,
- unpacker):
- if mod == '11':
- return Op_Reg_Reg(instr_length,
- name,
- Register(rm),
- Register(r),
- is_word
- )
- else:
- if to_reg:
- return Op_Reg_Ea(instr_length,
- name,
- Register(r),
- create_ea(mod, rm, unpacker.dispatch(mod, rm)),
- is_word
- )
- else:
- return Op_Ea_Reg(instr_length,
- name,
- Register(r),
- create_ea(mod, rm, unpacker.dispatch(mod, rm)),
- is_word
- )
- def op_dw_mod_reg_rm(name, match):
- return _mnemonic_rm_r(
- (2 +
- _nr_ea_bytes(match.group('mod'), match.group('rm'))
- ),
- name,
- to_reg=match.group('dw')[0] == '1',
- is_word=match.group('dw')[1] == '1',
- mod=match.group('mod'),
- rm=match.group('rm'),
- r=match.group('reg'),
- unpacker=unpacker_from_bitstr(match.string, 2)
- )
- def op_w_mod_rm(name, match):
- w = match.group('w')
- mod = match.group('mod')
- rm = match.group('rm')
- if mod == '11':
- return Op_Reg(2, name, Register(rm), w == '1')
- else:
- raise Exception("Not implemented")
- def op_disp_byte(name, match):
- return Op_Disp(2, name, unpack_b(match, 1))
- def op_disp_word(name, match):
- return Op_Disp(3, name, unpack_w(match, 1))
- def op_no_args(name, match):
- return Op_NoArgs(1, name)
- def op_reg(name, match):
- return Op_Reg(1, name, Register(match.group('reg')), is_word=True)
- def op_reg_acc(name, match):
- return Op_Reg_Reg(1,
- name,
- Register('000'),
- Register(match.group('reg')),
- is_word=True
- )
- def op_w_mod_reg_rm(name, match):
- return _mnemonic_rm_r(2,
- name,
- to_reg=True,
- is_word=match.group('w') == '1',
- mod=match.group('mod'),
- rm=match.group('rm'),
- r=match.group('reg'),
- unpacker=unpacker_from_bitstr(match.string, 2)
- )
- def _w_reg_im(name, w, reg, unpacker):
- return _mnemonic_i_rm(1 + nr_bytes(is_word(w)),
- name, w == '1',
- '11',
- unpacker.unpack(0, is_word(w)),
- reg,
- unpacker
- )
- def op_w_reg_im(name, match):
- return _w_reg_im(name,
- match.group('w'),
- match.group('reg'),
- unpacker_from_bitstr(match.string, 1)
- )
- return {
- "100000{sw}{mod}111{rm}": lambda m: op_sw_mod_rm(m, "cmp"),
- "01110100{disp}": lambda m: op_disp_byte('jz', m),
- "01110101{disp}": lambda m: op_disp_byte('jnz', m),
- "11110100": lambda m: op_no_args("hlt", m),
- "1011{w}{reg}": lambda m: op_w_reg_im("mov", m),
- "01001{reg}": lambda m: op_reg("dec", m),
- "11101000{reg}": lambda m: op_disp_word("call", m),
- "01000{reg}": lambda m: op_reg("inc", m),
- "001100{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("xor", m),
- "000010{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("or", m),
- "01110010{disp}": lambda m: op_disp_byte('jc', m),
- "001110{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("cmp", m),
- "01110110{disp}": lambda m: op_disp_byte('jbe', m),
- "000000{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("add", m),
- "100000{sw}{mod}010{rm}": lambda m: op_sw_mod_rm(m, "adc"),
- "01111001{disp}": lambda m: op_disp_byte('jns', m),
- "01010{reg}": lambda m: op_reg("push", m),
- "100000{sw}{mod}100{rm}": # NOTE!! s always 0 according to http://datasheets.chipdb.org/Intel/x86/808x/datashts/8086/231455-006.pdf but not in the codegolf code??
- lambda m: op_sw_mod_rm(m, "and"),
- "01011{reg}": lambda m: op_reg("pop", m),
- "11111001": lambda m: op_no_args("stc", m),
- "000110{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("sbb", m),
- "11000011": lambda m: op_no_args("ret", m),
- "11101011": lambda m: op_disp_byte('jmp', m),
- "1111111{w}{mod}000{rm}": lambda m: op_w_mod_rm("inc", m),
- "0011110{w}": lambda m: op_w_acc("cmp", m),
- "1100011{w}{mod}000{rm}": lambda m: op_w_mod_rm_im("mov", m),
- "1111111{w}{mod}001{rm}": lambda m: op_w_mod_rm("dec", m),
- "100000{sw}{mod}000{rm}": lambda m: op_sw_mod_rm(m, "add"),
- "10010{reg}": lambda m: op_reg_acc("xchg", m),
- "100010{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("mov", m),
- "1000000{w}{mod}001{rm}": lambda m: op_w_mod_rm_im("or", m),
- "1000011{w}{mod}{reg}{rm}": lambda m: op_w_mod_reg_rm("xchg", m),
- "001000{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("and", m),
- "100000{sw}{mod}101{rm}": lambda m: op_sw_mod_rm_im("sub", m),
- "001010{dw}{mod}{reg}{rm}": lambda m: op_dw_mod_reg_rm("sub", m),
- "01110111{disp}": lambda m: op_disp_byte('jnbe', m),
- "0000010{w}": lambda m: op_w_acc("add", m),
- }
- def decode_instr(self, bytes):
- def instr_bitstr():
- return ''.join(bin(b)[2:].rjust(8, '0') for b in bytes[:6])
- for p, dispatcher in self._dispatch_from_re.iteritems():
- if p.match(instr_bitstr()):
- return dispatcher(p.match(instr_bitstr()))
- return Unknown_Op()
- def instructions(self, program_bytes):
- pc = 0
- while pc < len(program_bytes):
- instr = self.decode_instr(program_bytes[pc:pc + 6])
- pc += instr.length()
- yield instr
- class RunToAnyBreakPoint(object):
- def __init__(self, breaks, state_after):
- self._breaks = breaks
- self._breaked = False
- self._next_state = state_after
- def next_state(self):
- return self if not self._breaked else self._next_state
- def update(self, machine):
- if self._breaked:
- return
- if machine.state().ip() not in self._breaks:
- machine.next_clock(None)
- else:
- self._breaked = True
- class InteractiveDebug(object):
- def __init__(self):
- self._run_to_break = False
- self._breaks = []
- self._next_state = self
- def next_state(self):
- return self._next_state
- def see_ram(self, machine_state, address):
- print "M {0}: {1} ...".format(
- hex(address),
- machine_state.ram_hexdump(address, 12)
- )
- def _run_to_next_state(self):
- if len(self._breaks) != 0:
- self._next_state = RunToAnyBreakPoint(self._breaks, self)
- else:
- self._next_state = self
- def update(self, machine):
- print machine.state()
- if machine.state().halted():
- return
- print DisAsm().decode_instr(machine.state().instr_bytes())
- machine.next_clock(None)
- print ("[Enter]:step [R]:run [B 0xadr]:add break "
- "[M 0xadr]:see RAM [Q]:quit")
- while True:
- v = raw_input()
- if len(v) == 0:
- return
- elif v[0] == 'M':
- self.see_ram(machine.state(), int(v[1:], 16))
- elif v[0] == 'B':
- self._breaks.append(int(v[1:], 16))
- elif v[0] == 'R':
- self._run_to_next_state()
- return
- elif v[0] == 'Q':
- return
- class PrintStateAndRun(object):
- def next_state(self):
- return self
- def update(self, machine):
- print
- print machine.state()
- print DisAsm().decode_instr(machine.state().instr_bytes())
- machine.next_clock(None)
- class Runner(object):
- def __init__(self, initial):
- self._runner = initial
- def update(self, machine):
- self._runner = self._runner.next_state()
- self._runner.update(machine)
- def main(program_path, debug, debug_step):
- with open(program_path, 'rb') as program_file:
- machine = Machine(State(), DisAsm())
- machine.state().set_reg_value(Register.index_from_name('SP'),
- 0x100, True)
- machine.load(program_file.read())
- if debug or debug_step:
- if debug_step:
- runner = Runner(InteractiveDebug())
- elif debug:
- runner = Runner(PrintStateAndRun())
- while not machine.state().halted():
- runner.update(machine)
- else:
- def run(console, machine):
- while (machine.next_clock(console)):
- pass
- import console
- console.run(run, machine)
- if __name__ == '__main__':
- import sys
- main(os.path.join(os.path.dirname(__file__), 'codegolf'),
- debug=len(sys.argv) > 1,
- debug_step=len(sys.argv) > 2
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement