Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from .vm import REGISTER_COUNT
- from.instructions import INSTRUCTION_SIZE, INSTRUCTION_SET, SYSCALL_SET
- # INSTRUCTION_SET = [
- # 'add', 'sub', 'mul', 'div', # arithmetic
- # 'and', 'or', 'xor', # logical
- # 'mov', # move
- # 'jmp', 'jne', 'je', # jumps
- # 'call', 'syscall' # call instructions
- # 'ret', # return instruction
- # ]
- # .data
- # label = value
- #
- # .code
- # add r1 label2 r3 # where r1-r16 - register
- # # moreover sp=r16 - stack pointer
- # add [128] r2 r3 # [128] - the 128-th cell of memory
- # add 2 r2 r3 # 2 - implicit value
- # :label
- # mov [128] [127]
- # -1 0 ... offset ...
- # <registers> <data segment> <code segment> <memory>
- class Assembler:
- def __init__(self):
- self.labels = {}
- self.data = {}
- self.data_section = []
- self.memory_offset = 0
- pass
- def _check_name(self, name):
- if name in self.labels or name in self.data or name:
- assert 'Identical label or data names --- ' + name
- @staticmethod
- def _prepare_text(text):
- text = '\n'.join(text)
- lines = text.split('\n') # split for lines
- lines = map(lambda x: x.split('#')[0], lines) # remove comments
- lines = map(lambda x: x.split(' '), lines)
- # remove empty elements in line
- lines = map(lambda line: list(filter(lambda word: len(word) > 0, line)), lines)
- lines = filter(lambda x: len(x) > 0, lines) # remove empty lines
- # lines = [['add', 'r1', 'label2', 'r3'],
- # ['add', '[128]', 'r2', 'r3'],
- # ...,
- # ]
- lines = list(lines)
- return lines
- def _process_data_section(self, data_lines):
- for line in data_lines:
- # process line with possible spaces
- tline = list(filter(lambda x: len(x) > 0, '='.join(line).split('=')))
- if tline[0] in self.data:
- assert 'Identical data labels'
- self.data_section.append(int(tline[1]))
- self.data[tline[0]] = len(self.data_section)
- return
- def _process_raw_values(self, code_lines):
- raw_values = {}
- for line in code_lines:
- for i in range(1, len(line)):
- try:
- value = int(line[i])
- if value not in raw_values:
- value_label = 'raw_value' + str(len(raw_values))
- self._check_name(value_label)
- raw_values[value] = value_label
- self.data_section.append(value)
- # get correct data section index for raw value
- self.data[value_label] = len(self.data_section)
- line[i] = raw_values[value]
- except ValueError:
- pass # not a raw value, ignore
- return code_lines
- def _process_commands(self, code_lines):
- for line in code_lines:
- valency, id = INSTRUCTION_SET[line[0]]
- line[0] = id
- return code_lines
- def _process_syscalls(self, code_lines):
- for line in code_lines:
- for i in range(1, len(line)):
- if line[i] in SYSCALL_SET:
- line[i] = SYSCALL_SET[line[i]]
- return code_lines
- def _prepare_labels(self, code_lines):
- cleared_code = []
- for line in code_lines:
- if len(line) == 1 and line[0][0] == ':': # found label
- if line[0] in self.labels:
- assert 'Identical labels'
- self.labels[line[0][1:]] = len(cleared_code)
- else:
- cleared_code.append(line)
- return cleared_code
- @staticmethod
- def _check_is_register_name(name):
- # special case - stack pointer
- if name == 'sp':
- return True, -REGISTER_COUNT
- is_register = False
- value = None
- if name[0] != 'r':
- return is_register, value
- try:
- value = -int(name[1:])
- is_register = 0 >= value >= -REGISTER_COUNT
- except ValueError:
- pass # not a register
- return is_register, value
- @staticmethod
- def _check_is_memory_name(name):
- is_memory = name[0] == '[' and name[-1] == ']'
- offset = None
- if is_memory:
- offset = int(name[1:-1])
- return is_memory, offset
- def _process_memory_registers(self, code_lines):
- for line in code_lines:
- for i in range(1, len(line)):
- is_register, register_offset = self._check_is_register_name(line[i])
- is_memory, memory_offset = self._check_is_memory_name(line[i])
- if is_register:
- line[i] = register_offset
- elif is_memory:
- line[i] = memory_offset + self.memory_offset
- return code_lines
- def _process_data_labels(self, code_lines):
- for line in code_lines:
- for i in range(1, len(line)):
- if line[i] in self.data:
- offset = -self.memory_offset + self.data[line[i]]
- line[i] = '[' + str(offset) + ']'
- return code_lines
- def _process_jumps(self, code_lines):
- for line_index in range(len(code_lines)):
- line = code_lines[line_index]
- for i in range(1, len(line)):
- if line[i] in self.labels:
- # TODO
- if line[0] not in ['jne', 'jmp', 'je']:
- assert 'Strange usage of jump\'s label'
- offset = (-len(code_lines) + self.labels[line[i]] + 1) * INSTRUCTION_SIZE
- # create jump as offset in memory
- line[i] = '[' + str(offset) + ']'
- return code_lines
- def _append_short_commands(self, code_lines):
- for line_index in range(len(code_lines)):
- line = code_lines[line_index]
- # without command id and result
- args = line[1:][:-1]
- code_lines[line_index] = line[0] + args + [0] * (INSTRUCTION_SIZE - len(line)) + line[-1]
- return code_lines
- def assemble(self, text, filename):
- lines = self._prepare_text(text)
- code_pos = lines.index(['.code'])
- data_lines = lines[1:code_pos]
- code_lines = lines[code_pos + 1:]
- code_lines = self._prepare_labels(code_lines)
- self._process_data_section(data_lines)
- code_lines = self._process_raw_values(code_lines)
- self.memory_offset = len(self.data_section) + len(code_lines) * INSTRUCTION_SIZE
- code_lines = self._process_commands(code_lines)
- code_lines = self._process_syscalls(code_lines)
- code_lines = self._process_jumps(code_lines)
- code_lines = self._process_data_labels(code_lines)
- code_lines = self._process_memory_registers(code_lines)
- code_lines = self._append_short_commands(code_lines)
- data_bytes = self._translate_to_bytes(self.data_section)
- code_bytes = self._translate_to_bytes(code_lines)
- with open(filename, 'w') as file:
- file.write(self.data_section)
- pass
- def disassemble(self, filename):
- text = 'Not implemented yet'
- return text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement