RainbowUnicornMan

TPTASM.py (v0.6)

Aug 2nd, 2013
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2.  
  3. """tptasm.py
  4.  
  5. TPTASM is an assembly language for The Powder Toy's "8-bit processor v1.0" (see http://powdertoy.co.uk/Discussions/Thread/View.html?Thread=17358). Assembly languages make low-level programming a lot easier by getting rid of the bit manipulation and replacing it with friendlier words and numbers that actually show what the code does.
  6.  
  7. Here is an overview of its syntax:
  8.  
  9. * Comments start with semicolons (";") e.g. "; This is a comment"
  10. * RAM addresses are represented with square brackets e.g.
  11.  "[6]" = RAM SLOT 6.
  12. * To set A, B, C, RAM addresses or STDOUT to something
  13.  use "SET" e.g. "SET A, [6]" (sets A to the value at RAM
  14.  SLOT 7) or "SET [15], 5" (sets RAM SLOT 15 to 5) or "SET
  15.  OUT, [54]" (outputs RAM SLOT 54 to STDOUT)
  16. * To utilise the ALU use "ADD", "AND" etc. e.g. "ADD [4]"
  17.  (adds A + B and puts the result in RAM SLOT 4) or
  18.  "OR C" (puts A OR B in C)
  19. * To jump to a line use "JMP" e.g. "JMP 3" (jump to ROM SLOT 3)
  20. * To conditionally jump to a line use "IFE" e.g. "IFE 5" (if
  21.  A doesn't equal B, jump to line 5)
  22. * To quit use END by itself on a line.
  23. """
  24.  
  25. import re
  26. import sys
  27.  
  28. # The regex for a TPTASM number
  29. number_re = r"(?:[0-9]+|0[xX][0-9a-fA-F]+|0[bB][0-1]+)"
  30.  
  31. # The regex for a TPTASM name
  32. name_re = r"(?:[A-Za-z_][A-Za-z0-9_]*)"
  33.  
  34. # The regex for a line of TPTASM code
  35. line_re = re.compile(r"""
  36.    ^
  37.    \s*
  38.    (?:(?P<label>{name}):)? # label
  39.    \s*
  40.    (?P<thingy>
  41.        (?P<line>
  42.            (?P<name>SET|ADD|SUB|AND|OR|IFE|IFG|IFL|JMP|CLR|END)
  43.            (   (?<=SET) \s+(?P<set1>A|B|C|OUT|\[{number}\]|\${name}|\[B\])\s*,\s*(?P<set2>A|B|C|\[{number}\]|\${name}|{number}|INP)
  44.              | (?<=ADD) \s+(?P<add1>\[{number}\]|\${name}|C)
  45.              | (?<=SUB) \s+(?P<sub1>\[{number}\]|\${name}|C)
  46.              | (?<=AND) \s+(?P<and1>\[{number}\]|\${name}|C)
  47.              | (?<= OR) \s+(?P<or1>\[{number}\]|\${name}|C)
  48.              | (?<=IFE) \s+(?P<ife1>{number}|{name})
  49.              | (?<=IFG) \s+(?P<ifg1>{number}|{name})
  50.              | (?<=IFL) \s+(?P<ifl1>{number}|{name})
  51.              | (?<=JMP) \s+(?P<jmp1>{number}|{name})
  52.              | (?<=CLR) \s+(?P<clr1>\[(?:A|B|{number})\]|\${name})
  53.              | (?<=END)
  54.            )
  55.        )
  56.    )?
  57.    \s*
  58.    (?P<comment>;.*)? # comment
  59.    $
  60.    """.format(number=number_re, name=name_re), re.X)
  61.  
  62. def isaddr(s):
  63.     """Check if a string is a TPTASM memory address."""
  64.     return s.startswith('[') and isnum(s[1:-1])
  65.  
  66. def isvar(s):
  67.     """Check if a string is a TPTASM variable."""
  68.     return s.startswith("$")
  69.  
  70. def isnum(s):
  71.     """Check if a string is a valid Python integer."""
  72.     try:
  73.         int(s, 0)
  74.     except:
  75.         return False
  76.     return True
  77.  
  78. def num(s):
  79.     """Turn a number string or RAM address into a Python integer."""
  80.     if s.startswith('['):
  81.         return int(s[1:-1], 0)
  82.     return int(s, 0)
  83.  
  84. def binary(s, width):
  85.     """Make a number binary. Also restrict it to a certain width by adding
  86.    preceding zeros (when necessary)."""
  87.     # YAY double string formatting :)
  88.     return ("{:>0%s}" % width).format(bin(num(s))[2:])
  89.  
  90. def getvar(name, varmap):
  91.     """Get a variable's address from a varmap.
  92.    If the variable does not exist, create it and give it an address."""
  93.     if name not in varmap:
  94.         if len(varmap) == 0:
  95.             n = 1
  96.         else:
  97.             n = sorted(varmap.values())[-1] + 1
  98.         varmap[name] = n
  99.         #print("Added var {} [{}]".format(name, n))
  100.         if n > 100:
  101.             print("WARNING: RAM all used up!")
  102.     return "[" + str(varmap[name]) + "]"
  103.  
  104. def parse(text):
  105.     """Parse multiple lines of code and output the compiled program in
  106.    a list of binary strings."""
  107.     lines = text.upper().split('\n')
  108.     out = []
  109.     labelmap = {}
  110.     varmap = {}
  111.     instrno = -1
  112.     for lineno, line in enumerate(lines):
  113.         line2 = line_re.match(line)
  114.         if line2 is None:
  115.             raise BaseException("syntax error on line {}".format(lineno + 1))
  116.         if line2.groupdict()['label'] is not None:
  117.             labelmap[line2.groupdict()['label']] = instrno + 2
  118.         if line2.groupdict()['line'] is None:
  119.             continue
  120.         instrno += 1
  121.     instrno = -1
  122.     for lineno, line in enumerate(lines):
  123.         line2 = line_re.match(line)
  124.         gd = line2.groupdict()
  125.         if gd['line'] is None:
  126.             continue
  127.         instrno += 1
  128.         if gd['name'] == 'SET':
  129.             set1 = gd['set1']
  130.             set2 = gd['set2']
  131.             if isvar(set1):
  132.                 set1 = getvar(set1, varmap)
  133.             if isvar(set2):
  134.                 set2 = getvar(set2, varmap)
  135.             if set1 == 'A' and isnum(set2):
  136.                 out.append("{}00000000000000001".format(binary(set2, 8)))
  137.             elif set1 == 'B' and isnum(set2):
  138.                 out.append("{}00000000000000010".format(binary(set2, 8)))
  139.             elif set1 == 'C' and isnum(set2):
  140.                 out.append("{}00000000000000011".format(binary(set2, 8)))
  141.             elif isaddr(set1) and isnum(set2):
  142.                 out.append("{}0{}000000100".format(binary(set2, 8), binary(set1, 7)))
  143.             elif set1 == 'A' and isaddr(set2):
  144.                 out.append("111111110{}000000101".format(binary(set2, 7)))
  145.             elif set1 == 'B' and isaddr(set2):
  146.                 out.append("111111110{}000000110".format(binary(set2, 7)))
  147.             elif set1 == 'C' and isaddr(set2):
  148.                 out.append("111111110{}000000111".format(binary(set2, 7)))
  149.             elif set1 == 'A' and set2 == 'C':
  150.                 out.append("0000000000000000000001000")
  151.             elif set1 == 'B' and set2 == 'C':
  152.                 out.append("0000000000000000000001001")
  153.             elif isaddr(set1) and set2 == 'C':
  154.                 out.append("000000000{}000001010".format(binary(set1, 7)))
  155.             elif set1 == 'OUT' and isaddr(set2):
  156.                 out.append("111111110{}000001101".format(binary(set2, 7)))
  157.             elif set1 == '[B]' and set2 == 'A':
  158.                 out.append("0000000000000000000001110")
  159.             elif isaddr(set1) and set2 == "INP":
  160.                 out.append("000000000{}000010010".format(binary(set1, 7)))
  161.             else:
  162.                 raise Exception("unsupported SET operation on line {}".format(lineno + 1))
  163.         elif gd['name'] == 'ADD':
  164.             if isvar(gd['add1']):
  165.                 gd['add1'] = getvar(gd['add1'], varmap)
  166.             if gd['add1'] == 'C':  # register C
  167.                 out.append("0000000000000000000101011")
  168.             else:  # RAM value
  169.                 number = bin(int(gd['add1'][1:-1], 0))[2:]  # convert to binary
  170.                 out.append("000000000{:>07}000101100".format(number))
  171.         elif gd['name'] == 'AND':
  172.             if isvar(gd['and1']):
  173.                 gd['and1'] = getvar(gd['and1'], varmap)
  174.             if gd['and1'] == 'C':  # register C
  175.                 out.append("0000000000000000001001011")
  176.             else:  # RAM value
  177.                 number = bin(int(gd['and1'][1:-1], 0))[2:]  # convert to binary
  178.                 out.append("000000000{:>07}001001100".format(number))
  179.         elif gd['name'] == 'OR':
  180.             if isvar(gd['or1']):
  181.                 gd['or1'] = getvar(gd['or1'], varmap)
  182.             if gd['or1'] == 'C':  # register C
  183.                 out.append("0000000000000000001101011")
  184.             else:  # RAM value
  185.                 number = bin(int(gd['or1'][1:-1], 0))[2:]  # convert to binary
  186.                 out.append("000000000{:>07}001101100".format(number))
  187.         elif gd['name'] == 'IFE':
  188.             if isnum(gd['ife1']):  # line number
  189.                 number = bin(int(gd['ife1'], 0))[2:]  # convert to binary
  190.             else:  # label
  191.                 number = bin(labelmap[gd['ife1']])[2:]
  192.             out.append("000000000{:>07}010001011".format(number))
  193.         elif gd['name'] == 'JMP':
  194.             if isnum(gd['jmp1']):  # line number
  195.                 number = bin(int(gd['jmp1'], 0))[2:]  # convert to binary
  196.             else:  # label
  197.                 number = bin(labelmap[gd['jmp1']])[2:]
  198.             out.append("000000000{:>07}010101011".format(number))
  199.         elif gd['name'] == 'IFG':
  200.             if isnum(gd['ifg1']):  # line number
  201.                 number = bin(int(gd['ifg1'], 0))[2:]  # convert to binary
  202.             else:  # label
  203.                 number = bin(labelmap[gd['ifg1']])[2:]
  204.             out.append("000000000{:>07}011001011".format(number))  # made up
  205.         elif gd['name'] == 'IFL':
  206.             if isnum(gd['ifl1']):  # line number
  207.                 number = bin(int(gd['ifl1'], 0))[2:]  # convert to binary
  208.             else:  # label
  209.                 number = bin(labelmap[gd['ifl1']])[2:]
  210.             out.append("000000000{:>07}011101011".format(number))  # made up
  211.         elif gd['name'] == 'SUB':
  212.             if isvar(gd['sub1']):
  213.                 gd['sub1'] = getvar(gd['sub1'], varmap)
  214.             if gd['add1'] == 'C':  # register C
  215.                 out.append("0000000000000000100001011")  # made up
  216.             else:  # RAM value
  217.                 number = bin(int(gd['sub1'][1:-1], 0))[2:]  # convert to binary
  218.                 out.append("000000000{:>07}100001100".format(number))  # made up
  219.         elif gd['name'] == 'CLR':
  220.             if isvar(gd['clr1']):
  221.                 gd['clr1'] = getvar(gd['clr1'], varmap)
  222.             if gd['clr1'] == '[A]':
  223.                 out.append("0000000000000000000010000")
  224.             elif gd['clr1'] == '[B]':
  225.                 out.append("0000000000000000000010001")
  226.             else:  # RAM address
  227.                 out.append("000000000{}000001111".format(binary(gd['clr1'], 7)))
  228.         elif gd['name'] == 'END':
  229.             out.append("0000000010000000000000000")
  230.         else:
  231.             raise Exception("something strange happened! Please report this bug!")
  232.  
  233.     if not all([len(i) == 25 for i in out]):
  234.         print(out)
  235.         for i, v in enumerate(out):
  236.             if len(v) != 25:
  237.                 print(i, v)
  238.         raise Exception("uh-oh, something bad happened! Please report this bug!")
  239.  
  240.     return out
  241.  
  242. def print_(s):
  243.     print("[{}][{}][{}][{}][{}]".format(s[:8], s[8:9], s[9:16], s[16:20], s[20:]))
  244.  
  245. def main(argv):
  246.     f = sys.stdin
  247.     if len(argv) > 1:
  248.         f = open(argv[1])
  249.     if f == sys.stdin:
  250.         print(
  251. """TPTASM is an assembly language for The Powder Toy's "8-bit processor v1.0" (see http://powdertoy.co.uk/Discussions/Thread/View.html?Thread=17358). Assembly languages make low-level programming a lot easier by getting rid of the bit manipulation and replacing it with friendlier words and numbers that actually show what the code does.
  252.  
  253. Here is an overview of its syntax:
  254.  
  255. * Comments start with semicolons (";") e.g. "; This is a comment"
  256. * RAM addresses are represented with square brackets e.g.
  257.  "[6]" = RAM SLOT 6.
  258. * To set A, B, C, RAM addresses or STDOUT to something
  259.  use "SET" e.g. "SET A, [6]" (sets A to the value at RAM
  260.  SLOT 7) or "SET [15], 5" (sets RAM SLOT 15 to 5) or "SET
  261.  OUT, [54]" (outputs RAM SLOT 54 to STDOUT)
  262. * To utilise the ALU use "ADD", "AND" etc. e.g. "ADD [4]"
  263.  (adds A + B and puts the result in RAM SLOT 4) or
  264.  "OR C" (puts A OR B in C)
  265. * To make a label use colons e.g. "mainloop:"
  266. * To jump to a line use "JMP" e.g. "JMP 3" (jump to ROM SLOT 3)
  267.  or use labels e.g. "JMP mainloop" (jump to the label "mainloop")
  268. * To conditionally jump to a line use "IFE" e.g. "IFE 5" (if
  269.  A doesn't equal B, jump to line 5)
  270. * To quit use END by itself on a line.
  271. """)
  272.         print("Enter code (press {} to finish):".format(("Ctrl-D", "Ctrl-Z")[sys.platform == "win32"]))
  273.     program = f.read()
  274.     result = parse(program)
  275.     if f == sys.stdin:
  276.         print("Here is your program! Enjoy!")
  277.     for i in result:
  278.         yield i
  279.     if len(result) > 100:
  280.         print("WARNING: program too long! ({})".format(len(result)))
  281.     if f == sys.stdin:
  282.         sys.stdin.readline()
  283.  
  284. if __name__ == "__main__":
  285.     for i in main(sys.argv):
  286.         print_(i)
Advertisement
Add Comment
Please, Sign In to add comment