Advertisement
Guest User

Untitled

a guest
Jun 20th, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.44 KB | None | 0 0
  1. from .vm import REGISTER_COUNT
  2. from.instructions import INSTRUCTION_SIZE, INSTRUCTION_SET, SYSCALL_SET
  3.  
  4.  
  5. # INSTRUCTION_SET = [
  6. #     'add', 'sub', 'mul', 'div', # arithmetic
  7. #     'and', 'or', 'xor',         # logical
  8. #     'mov',                      # move
  9. #     'jmp', 'jne', 'je',         # jumps
  10. #     'call', 'syscall'           # call instructions
  11. #     'ret',                      # return instruction
  12. # ]
  13. # .data
  14. #   label = value
  15. #
  16. # .code
  17. #   add r1 label2 r3    # where r1-r16 - register
  18. #                       # moreover sp=r16 - stack pointer
  19. #  add [128] r2 r3      # [128] - the 128-th cell of memory
  20. #  add 2 r2 r3          # 2 - implicit value
  21. # :label
  22. #   mov [128] [127]
  23. #          -1  0 ...                        offset ...
  24. # <registers> <data segment> <code segment> <memory>
  25.  
  26. class Assembler:
  27.     def __init__(self):
  28.         self.labels = {}
  29.         self.data = {}
  30.         self.data_section = []
  31.         self.memory_offset = 0
  32.         pass
  33.  
  34.     def _check_name(self, name):
  35.         if name in self.labels or name in self.data or name:
  36.             assert 'Identical label or data names --- ' + name
  37.  
  38.     @staticmethod
  39.     def _prepare_text(text):
  40.         text = '\n'.join(text)
  41.         lines = text.split('\n')  # split for lines
  42.         lines = map(lambda x: x.split('#')[0], lines)  # remove comments
  43.         lines = map(lambda x: x.split(' '), lines)
  44.         # remove empty elements in line
  45.         lines = map(lambda line: list(filter(lambda word: len(word) > 0, line)), lines)
  46.         lines = filter(lambda x: len(x) > 0, lines)  # remove empty lines
  47.         # lines = [['add', 'r1', 'label2', 'r3'],
  48.         #          ['add', '[128]', 'r2', 'r3'],
  49.         #           ...,
  50.         #         ]
  51.         lines = list(lines)
  52.         return lines
  53.  
  54.     def _process_data_section(self, data_lines):
  55.         for line in data_lines:
  56.             # process line with possible spaces
  57.             tline = list(filter(lambda x: len(x) > 0, '='.join(line).split('=')))
  58.             if tline[0] in self.data:
  59.                 assert 'Identical data labels'
  60.             self.data_section.append(int(tline[1]))
  61.             self.data[tline[0]] = len(self.data_section)
  62.         return
  63.  
  64.     def _process_raw_values(self, code_lines):
  65.         raw_values = {}
  66.         for line in code_lines:
  67.             for i in range(1, len(line)):
  68.                 try:
  69.                     value = int(line[i])
  70.                     if value not in raw_values:
  71.                         value_label = 'raw_value' + str(len(raw_values))
  72.                         self._check_name(value_label)
  73.  
  74.                         raw_values[value] = value_label
  75.                         self.data_section.append(value)
  76.                         # get correct data section index for raw value
  77.                         self.data[value_label] = len(self.data_section)
  78.  
  79.                     line[i] = raw_values[value]
  80.                 except ValueError:
  81.                     pass  # not a raw value, ignore
  82.         return code_lines
  83.  
  84.     def _process_commands(self, code_lines):
  85.         for line in code_lines:
  86.             valency, id = INSTRUCTION_SET[line[0]]
  87.             line[0] = id
  88.         return code_lines
  89.  
  90.     def _process_syscalls(self, code_lines):
  91.         for line in code_lines:
  92.             for i in range(1, len(line)):
  93.                 if line[i] in SYSCALL_SET:
  94.                     line[i] = SYSCALL_SET[line[i]]
  95.         return code_lines
  96.  
  97.     def _prepare_labels(self, code_lines):
  98.         cleared_code = []
  99.         for line in code_lines:
  100.             if len(line) == 1 and line[0][0] == ':':  # found label
  101.                 if line[0] in self.labels:
  102.                     assert 'Identical labels'
  103.                 self.labels[line[0][1:]] = len(cleared_code)
  104.             else:
  105.                 cleared_code.append(line)
  106.         return cleared_code
  107.  
  108.     @staticmethod
  109.     def _check_is_register_name(name):
  110.         # special case - stack pointer
  111.         if name == 'sp':
  112.             return True, -REGISTER_COUNT
  113.  
  114.         is_register = False
  115.         value = None
  116.         if name[0] != 'r':
  117.             return is_register, value
  118.  
  119.         try:
  120.             value = -int(name[1:])
  121.             is_register = 0 >= value >= -REGISTER_COUNT
  122.         except ValueError:
  123.             pass  # not a register
  124.         return is_register, value
  125.  
  126.     @staticmethod
  127.     def _check_is_memory_name(name):
  128.         is_memory = name[0] == '[' and name[-1] == ']'
  129.         offset = None
  130.         if is_memory:
  131.             offset = int(name[1:-1])
  132.         return is_memory, offset
  133.  
  134.     def _process_memory_registers(self, code_lines):
  135.         for line in code_lines:
  136.             for i in range(1, len(line)):
  137.                 is_register, register_offset = self._check_is_register_name(line[i])
  138.                 is_memory, memory_offset = self._check_is_memory_name(line[i])
  139.                 if is_register:
  140.                     line[i] = register_offset
  141.                 elif is_memory:
  142.                     line[i] = memory_offset + self.memory_offset
  143.  
  144.         return code_lines
  145.  
  146.     def _process_data_labels(self, code_lines):
  147.         for line in code_lines:
  148.             for i in range(1, len(line)):
  149.                 if line[i] in self.data:
  150.                     offset = -self.memory_offset + self.data[line[i]]
  151.                     line[i] = '[' + str(offset) + ']'
  152.         return code_lines
  153.  
  154.     def _process_jumps(self, code_lines):
  155.         for line_index in range(len(code_lines)):
  156.             line = code_lines[line_index]
  157.             for i in range(1, len(line)):
  158.                 if line[i] in self.labels:
  159.                     # TODO
  160.                     if line[0] not in ['jne', 'jmp', 'je']:
  161.                         assert 'Strange usage of jump\'s label'
  162.  
  163.                     offset = (-len(code_lines) + self.labels[line[i]] + 1) * INSTRUCTION_SIZE
  164.                     # create jump as offset in memory
  165.                     line[i] = '[' + str(offset) + ']'
  166.         return code_lines
  167.  
  168.     def _append_short_commands(self, code_lines):
  169.         for line_index in range(len(code_lines)):
  170.             line = code_lines[line_index]
  171.             # without command id and result
  172.             args = line[1:][:-1]
  173.             code_lines[line_index] = line[0] + args + [0] * (INSTRUCTION_SIZE - len(line)) + line[-1]
  174.         return code_lines
  175.  
  176.     def assemble(self, text, filename):
  177.         lines = self._prepare_text(text)
  178.  
  179.         code_pos = lines.index(['.code'])
  180.         data_lines = lines[1:code_pos]
  181.         code_lines = lines[code_pos + 1:]
  182.  
  183.         code_lines = self._prepare_labels(code_lines)
  184.  
  185.         self._process_data_section(data_lines)
  186.         code_lines = self._process_raw_values(code_lines)
  187.         self.memory_offset = len(self.data_section) + len(code_lines) * INSTRUCTION_SIZE
  188.  
  189.         code_lines = self._process_commands(code_lines)
  190.         code_lines = self._process_syscalls(code_lines)
  191.  
  192.         code_lines = self._process_jumps(code_lines)
  193.         code_lines = self._process_data_labels(code_lines)
  194.  
  195.         code_lines = self._process_memory_registers(code_lines)
  196.  
  197.         code_lines = self._append_short_commands(code_lines)
  198.  
  199.         data_bytes = self._translate_to_bytes(self.data_section)
  200.         code_bytes = self._translate_to_bytes(code_lines)
  201.  
  202.  
  203.         with open(filename, 'w') as file:
  204.             file.write(self.data_section)
  205.         pass
  206.  
  207.     def disassemble(self, filename):
  208.         text = 'Not implemented yet'
  209.         return text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement