Advertisement
Guest User

Untitled

a guest
Aug 28th, 2016
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.43 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. """HACK assembler.
  4.  
  5. Translates HACK Assembly language to HACK machine language.
  6. """
  7.  
  8. import argparse
  9. import functools
  10. import re
  11. import os
  12.  
  13.  
  14. ARCH_BITS = 16
  15.  
  16. RE_A_TYPE_INSTR = re.compile(r"^@.*")
  17. RE_C_TYPE_INSTR = re.compile(r"^.*=.*|.*;.*")
  18. RE_COMMENT = re.compile(r"^\s*//.*")
  19. RE_WHITESPACE = re.compile(r"^\s*$")
  20. RE_LABEL_DECL = re.compile(r"^\(.*\)")
  21.  
  22. A_TYPE_INSTR = "A TYPE INSTRUCTION"
  23. C_TYPE_INSTR = "C TYPE INSTRUCTION"
  24. COMMENT = "COMMENTARY"
  25. WHITESPACE = "WHITESPACE"
  26. LABEL_DECL = "LABEL DECLARATION"
  27.  
  28. TOKENS = {
  29. RE_A_TYPE_INSTR: A_TYPE_INSTR,
  30. RE_C_TYPE_INSTR: C_TYPE_INSTR,
  31. RE_COMMENT: COMMENT,
  32. RE_WHITESPACE: WHITESPACE,
  33. RE_LABEL_DECL: LABEL_DECL,
  34. }
  35.  
  36. BACK_TOKENS = {
  37. A_TYPE_INSTR: RE_A_TYPE_INSTR,
  38. C_TYPE_INSTR: RE_C_TYPE_INSTR,
  39. COMMENT: RE_COMMENT,
  40. WHITESPACE: RE_WHITESPACE,
  41. LABEL_DECL: RE_LABEL_DECL,
  42. }
  43.  
  44. def create_builtins_symbol_table():
  45. """Builtin symbol table."""
  46.  
  47. BUILTINS_SYMBOL_TABLE = {
  48. "SCREEN": a_absolute_address(16384),
  49. "KBD": a_absolute_address(24576),
  50. "SP": a_absolute_address(0),
  51. "LCL": a_absolute_address(1),
  52. "ARG": a_absolute_address(2),
  53. "THIS": a_absolute_address(3),
  54. "THAT": a_absolute_address(4),
  55. }
  56.  
  57. # First 16 RAM register aliases: R0..R15
  58. for i in range(16):
  59. BUILTINS_SYMBOL_TABLE["R" + str(i)] = a_absolute_address(i)
  60. return BUILTINS_SYMBOL_TABLE
  61.  
  62.  
  63. def is_type(token, source):
  64. """Check token class.
  65.  
  66. :param str token: token class
  67. :param str source: line of code
  68. """
  69. return BACK_TOKENS[token].match(source)
  70.  
  71.  
  72. def a_absolute_address(symbol):
  73. return bin(int(symbol))[2:].zfill(ARCH_BITS)
  74.  
  75.  
  76. def ralloc(symbol_table, start_address=16):
  77. free_address = start_address
  78. allocated_addresses = symbol_table.values()
  79. while True:
  80. if free_address in allocated_addresses:
  81. free_address += 1
  82. else:
  83. return free_address
  84.  
  85.  
  86. def parse_a_instuction(instruction, symbol_table):
  87. symbol = instruction[1:]
  88. # it's already address
  89. if symbol.isdigit():
  90. return a_absolute_address(symbol)
  91. else:
  92. # built-in or created variable
  93. if symbol in symbol_table:
  94. return symbol_table[symbol]
  95. else:
  96. # Dirty awwwww...
  97. # labels must be uppercased
  98. # creating label for future reassigning
  99. if symbol.isupper():
  100. symbol_table[symbol] = None
  101. return symbol
  102.  
  103. # new variable or label
  104. address = ralloc(symbol_table)
  105. symbol_table[symbol] = a_absolute_address(address)
  106. print("ALLOC: 16 bytes allocated, address = " + str(address))
  107. return a_absolute_address(address)
  108.  
  109.  
  110. def parse_c_instruction(instruction):
  111.  
  112. instruction_template = "{opcode}{reserved}{comp}{dest}{jmp}"
  113.  
  114. JMP_MAP = {
  115. "": "000",
  116. "JGT": "001",
  117. "JEQ": "010",
  118. "JGE": "011",
  119. "JLT": "100",
  120. "JNE": "101",
  121. "JLE": "110",
  122. "JMP": "111",
  123. }
  124.  
  125. DEST_MAP = {
  126. "": "000",
  127. "M": "001",
  128. "D": "010",
  129. "MD": "011",
  130. "A": "100",
  131. "AM": "101",
  132. "AD": "110",
  133. "AMD": "111",
  134. }
  135.  
  136. COMP_MAP = {
  137. "0": "0101010",
  138. "1": "0111111",
  139. "-1": "0111010",
  140. "D": "0001100",
  141. "A": "0110000",
  142. "!D": "0001101",
  143. "!A": "0110001",
  144. "-D": "0001111",
  145. "-A": "0110011",
  146. "D+1": "0011111",
  147. "A+1": "0110111",
  148. "D-1": "0001110",
  149. "A-1": "0110010",
  150. "D+A": "0000010",
  151. "D-A": "0010011",
  152. "A-D": "0000111",
  153. "D&A": "0000000",
  154. "D|A": "0010101",
  155. "M": "1110000",
  156. "!M": "1110001",
  157. "-M": "1110011",
  158. "M+1": "1110111",
  159. "M-1": "1110010",
  160. "D+M": "1000010",
  161. "D-M": "1010011",
  162. "M-D": "1000111",
  163. "D&M": "1000000",
  164. "D|M": "1010101",
  165. }
  166.  
  167. def parse_jmp(instruction):
  168.  
  169. if ";" in instruction:
  170. left, jmp = instruction.split(";")
  171. else:
  172. left, jmp = instruction, ""
  173. return left, JMP_MAP[jmp.strip()]
  174.  
  175. def parse_dest_comp(instruction):
  176.  
  177. if "=" in instruction:
  178. dest, comp = instruction.split("=")
  179. else:
  180. # Unconditional jump: 0; JMP
  181. dest, comp = ("", "0")
  182. return DEST_MAP[dest.strip()], COMP_MAP[comp.strip().replace(" ", "")]
  183.  
  184. left, jmp = parse_jmp(instruction)
  185. dest, comp = parse_dest_comp(left)
  186.  
  187. values = {
  188. "opcode": 1,
  189. "reserved": 11,
  190. "comp": comp,
  191. "dest": dest,
  192. "jmp": jmp
  193. }
  194.  
  195. return instruction_template.format(**values)
  196.  
  197.  
  198. def process_label_declaration(instruction, lineno, symbol_table):
  199. label = instruction[1:-1]
  200.  
  201. if label in symbol_table:
  202. current = symbol_table[label]
  203. if current is None:
  204. current = a_absolute_address(lineno)
  205. symbol_table[label] = current
  206. return label, current
  207.  
  208.  
  209. def coroutine(func):
  210. @functools.wraps(func)
  211. def _coroutine(*args, **kwargs):
  212. gen = func(*args, **kwargs)
  213. next(gen)
  214. return gen
  215. return _coroutine
  216.  
  217.  
  218. @coroutine
  219. def reader(source):
  220. for line in source:
  221. yield line.strip()
  222.  
  223.  
  224. @coroutine
  225. def assembler(source):
  226. """Produce translated code."""
  227.  
  228. lineno = 0
  229. symbol_table = create_builtins_symbol_table()
  230. translated = []
  231.  
  232. for line in source:
  233. if is_type(COMMENT, line) or is_type(WHITESPACE, line):
  234. # ignore comments and empty lines
  235. continue
  236. elif is_type(A_TYPE_INSTR, line):
  237. result = parse_a_instuction(line, symbol_table)
  238. elif is_type(C_TYPE_INSTR, line):
  239. result = parse_c_instruction(line)
  240. elif is_type(LABEL_DECL, line):
  241. label, new_address = process_label_declaration(line, lineno, symbol_table)
  242. for item in range(len(translated)):
  243. if translated[item] == label:
  244. translated[item] = new_address
  245. # no need to increase lineno counter and produce output
  246. continue
  247. else:
  248. result = line
  249.  
  250. translated.append(result)
  251. lineno += 1
  252.  
  253. yield
  254. for s in translated:
  255. yield s
  256.  
  257.  
  258. def parse_args():
  259. """Parse and return incoming arguments.
  260.  
  261. :return: parsed arguments
  262. :rtype: argparse.Namespace
  263. """
  264. parser = argparse.ArgumentParser(description="HACK assembler")
  265. parser.add_argument("-o", "--output", help="output file")
  266. parser.add_argument("filename", help="input file")
  267.  
  268. return parser.parse_args()
  269.  
  270.  
  271. def prepare_filenames(infile, outfile=None, out_ext="hack"):
  272. """Choose out file name if not provided.
  273.  
  274. :param str infile: input file name
  275. :param str outfile: output file name
  276. :param str out_ext: output file extension
  277.  
  278. :return: processed file names
  279. :rtype: tuple(str, str)
  280. """
  281. name, ext = os.path.splitext(infile)
  282. outfile = ".".join([name, out_ext]) if outfile is None else outfile
  283. return infile, outfile
  284.  
  285.  
  286. def main():
  287. """Main entry point."""
  288. args = parse_args()
  289. infile, outfile = prepare_filenames(args.filename, args.output)
  290. machine_code = assembler(reader(open(infile)))
  291.  
  292. #with open(infile) as source:
  293. # assembler.send(source.readline().strip())
  294. with open(outfile, "w") as target:
  295. for line in machine_code:
  296. target.write(line + "\n")
  297.  
  298.  
  299. if __name__ == "__main__":
  300. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement