Advertisement
Guest User

Python Assembler Lexer

a guest
Mar 3rd, 2010
898
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.09 KB | None | 0 0
  1. import os
  2. import sys
  3. from Plex import *
  4. from mc import mnemonic2op, no_operands, MAXINT
  5. from string import ascii_letters
  6.  
  7. letter = Range("AZaz")
  8. bit = Str('0') | Str('1')
  9. digit = Range('09')
  10. hexdigit = digit | Range('afAF')
  11. integer = Opt(Str('-')) + Rep1(digit)
  12. hexint = Str('0x') + hexdigit + Opt(hexdigit)
  13. binint = Str('0b') + Rep1(bit)
  14. space = Any(" \t")
  15. ident = letter + Rep(letter | digit)
  16. label = Bol + ident
  17. filename = ident + Str('.') + Alt(Str('py'), Str('asm'))
  18. include = Bol + Str('#') + NoCase(Str('include')) + space
  19. comment = Str(';')
  20. star = Str('*') + Opt(Opt(space) + Alt(Str('+'), Str('-'))  + Opt(space) + Rep1(digit))
  21. reladdress = ident + Opt(space) + Alt(Str('+'), Str('-'))  + Opt(space) + Rep1(digit)
  22. number = integer | hexint | binint
  23. macrodef = label + NoCase(Str('MACRO'))
  24. eol = Str('\n') | Eof
  25. sq_string = (
  26.     Str("'") +
  27.     Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) +
  28.     Str("'"))  
  29. dq_string = (
  30.     Str('"') +
  31.     Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) +
  32.     Str('"'))
  33. astring = sq_string | dq_string
  34. plist = Alt(number, ident, astring) + Rep1(Opt(space) + Str(',') + Opt(space) + Alt(number, ident, astring))
  35.  
  36. class AsmSource(object):
  37.     def __init__(self):
  38.         self.lineno = 0
  39.         self._frommacro = False
  40.         self.macgen = 0
  41.         self._incsect = False
  42.         self.expanded = False
  43.         self.label = ''
  44.         self.mnemonic = ''
  45.         self.operand = None
  46.         self.lc = 0
  47.         self.opcode = None
  48.         self.operand_base = None
  49.         self.operand_offset = 0
  50.         self.operand_value = None
  51.         self.comment = ''
  52.         self.errors = []
  53.         self.raw = ''
  54.  
  55.     def getfrommacro(self):
  56.         return self._frommacro
  57.     def setfrommacro(self, value):
  58.         self._frommacro = value
  59.     frommacro = property(getfrommacro, setfrommacro)
  60.  
  61.     def getincsect(self):
  62.         """boolean indicating whether line is in the CSECT"""
  63.         return self._incsect
  64.     def setincsest(self, value):
  65.         self._incsect = value
  66.     incsect = property(getincsect, setincsest)
  67.  
  68.     def __str__(self):
  69.         s = []
  70.         for x in sorted(vars(self)):
  71.             s.append('%s: %s' % (x, getattr(self, x)))
  72.         return '\n'.join(s)
  73.  
  74.     def isa(self, *mnemonics):
  75.         for mnemonic in mnemonics:
  76.             if mnemonic.upper() == self.mnemonic.upper():
  77.                 return True
  78.         return False
  79.  
  80.     def isacomment(self):
  81.         # Empty line is assumed to be a comment?
  82.         return not(self.label or self.mnemonic or self.operand)
  83.  
  84.     def error(self, text, col=0):
  85.         self.errors.append( (text, col) )
  86.  
  87. class AsmScanner(Scanner):
  88.     def col(self):
  89.         return self.position()[2]
  90.  
  91.     def report(self, text):
  92.         self.srcline.error(text, self.col())
  93.  
  94.     def gotlabel(self, scanner, text):
  95.         self.srcline.label = text
  96.  
  97.     def gotident(self, scanner, text):
  98.         if not self.srcline.mnemonic:
  99.             # This ident token is the mnemonic
  100.             self.srcline.mnemonic = text
  101.             try:
  102.                 self.srcline.opcode = mnemonic2op[text.upper()]
  103.             except KeyError:
  104.                 pass # Maybe a macro call and the macro is not yet defined
  105.             if text == 'EQU' and not self.srcline.label:
  106.                 self.report('Unlabeled EQU')
  107.         elif not self.srcline.operand:
  108.             # This ident token is an operand
  109.             if self.srcline.mnemonic.upper() in self.macros or self.srcline.isa('MACRO'):
  110.                 self.gotplist(scanner, text)
  111.             else:
  112.                 self.srcline.operand = text
  113.         else:
  114.             self.report('Unexpected operand: "%s"' % (text))
  115.            
  116.     def gotnumber(self, scanner, text):
  117.         if self.srcline.operand:
  118.             self.report('Unexpected operand: "%s"' % (text))
  119.         else:
  120.             if not self.srcline.mnemonic:
  121.                 self.report('Missing mnemonic')
  122.             else:
  123.                 self.srcline.operand = text
  124.                 if self.srcline.operand.startswith('0x'):
  125.                     base = 16
  126.                 elif self.srcline.operand.startswith('0b'):
  127.                     base = 2
  128.                     text = text[2:]
  129.                 else:
  130.                     base = 10
  131.                 self.srcline.operand_value = int(text, base)
  132.                 if self.srcline.operand_value > MAXINT:
  133.                     self.report('Operand (%d) exceeds MAXINT (%d)' % (self.srcline.operand_value, MAXINT))
  134.                 if self.srcline.operand_value < 0:
  135.                     self.srcline.operand_value = 256 + self.srcline.operand_value # 2's complement
  136.  
  137.     def gotplist(self, scanner, text):
  138.         self.srcline.operand = text
  139.         text = text.replace(' ', '')
  140.         self.srcline.operand_value = text.split(',')
  141.  
  142.     def gotstar(self, scanner, text):
  143.         if not self.srcline.mnemonic:
  144.             # we have a mnemonic with a signed number: looks like a reladdress
  145.             parts = text.split()
  146.             if len(parts) == 2:
  147.                 self.gotident(scanner, parts[0])
  148.                 self.gotnumber(scanner, parts[1])
  149.                 return
  150.         self.srcline.operand = text
  151.         parts = text.split()
  152.         if len(parts) == 3:
  153.             self.srcline.operand_base = parts[0] # * or ident
  154.             self.srcline.operand_offset = int(parts[2]) if parts[1] == '+' else -int(parts[2])
  155.         else:
  156.             self.srcline.operand_base = text
  157.  
  158.     def gotstring(self, scanner, text):
  159.         self.srcline.operand = text
  160.         astring = eval(text)
  161.         self.srcline.operand_value = [ord(x) for x in list(astring)]
  162.         #self.srcline.operand_value = [ord(x) for x in list(text[1:-1])]
  163.         #self.srcline.operand_value.append(0)
  164.  
  165.     def startcomment(self, scanner, text):
  166.         self.comment = [text]
  167.         scanner.begin('comment')
  168.  
  169.     def endcomment(self, scanner, text):
  170.         self.srcline.comment = ''.join(self.comment)
  171.         scanner.begin('')
  172.  
  173.     def goteol(self, scanner, text):
  174.         if self.srcline.operand and self.srcline.mnemonic.upper() in no_operands:
  175.             self.report('Unexpected operand: "%s" (%s has no operands).' % (self.srcline.operand, self.srcline.mnemonic.upper()))
  176.         if self.srcline.isa('EQU'):
  177.             if self.srcline.operand is None:
  178.                 self.report('EQU without operand')
  179.             if not self.srcline.label:
  180.                 self.report('EQU without label')
  181.         elif self.srcline.isa('ORG') and not self.srcline.operand:
  182.             self.report('ORG without operand')
  183.         elif self.srcline.isa('DC') and self.srcline.operand is None:
  184.             self.report('DC without operand')
  185.         elif self.srcline.isa('MACRO'):
  186.             if not self.srcline.label:
  187.                 self.report('MACRO without label')
  188.             if not self.srcline.operand_value:
  189.                 self.srcline.operand_value = []
  190.         if isinstance(self.srcline.operand_value, list) and not self.srcline.isa('DC', 'MACRO') \
  191.                       and self.srcline.mnemonic.upper() not in self.macros:
  192.             if len(self.srcline.operand_value) != 1:
  193.                 self.report('String operand must be exactly one character')
  194.             else:
  195.                 self.srcline.operand_value = self.srcline.operand_value[0]
  196.         self.srcline.lineno = self.position()[1]
  197.         self.srclines.append(self.srcline)
  198.         self.srcline = AsmSource()
  199.  
  200.     def gotfilename(self, scanner, text):
  201.         self.srcline.filename = text
  202.  
  203.     def eof(self):
  204.         #print 'There are %d sourcelines' % len(self.srclines)
  205.         for oLine in self.srclines:
  206.             if oLine.errors:
  207.                 self.errors += len(oLine.errors)
  208.  
  209.     def __init__(self, afile):
  210.         self.errors = 0
  211.         self.filename = '<stream>'
  212.         self.comment = ''
  213.         self.srclines = []
  214.         self.srcline = AsmSource()
  215.         self.lexicon = Lexicon([
  216.             (label, self.gotlabel),
  217.             (ident, self.gotident),
  218.             (number, self.gotnumber),
  219.             (astring, self.gotstring),
  220.             (filename, self.gotfilename),
  221.             (star, self.gotstar),
  222.             (reladdress, self.gotstar),
  223.             (plist, self.gotplist),
  224.             (space, IGNORE),
  225.             (include, IGNORE),
  226.             (eol, self.goteol),
  227.             (comment, self.startcomment),
  228.             (State('comment', [
  229.                 (AnyBut('\n'), lambda s, t: self.comment.append(t)),
  230.                 ((Eol | Eof), self.endcomment),
  231.                 ])),
  232.             (State('Error', [
  233.                 (AnyBut('\n'), IGNORE),
  234.                 ((Eol|Eof), Begin(''))
  235.                 ])),
  236.             ])
  237.         Scanner.__init__(self, self.lexicon, afile, self.filename)
  238.  
  239. class Lexer(object):
  240.     def __init__(self, filename):
  241.         print >>sys.stdout, '%s: Lexing...' % (os.path.basename(filename))
  242.         self.errors = 0
  243.         import cStringIO
  244.         stream = cStringIO.StringIO()
  245.         infile = open(filename, 'U')
  246.         for line in infile:
  247.             stream.write(line)
  248.             if line.lower().startswith('#include'):
  249.                 incfilename = line.strip().split()[1]
  250.                 if incfilename.endswith('.asm'):
  251.                     self.addinclude(stream, incfilename)
  252.         stream.seek(0)
  253.         macrolabels = self.getmacrolabels(stream)
  254.         stream.seek(0)
  255.         scanner = AsmScanner(stream)
  256.         scanner.macros = macrolabels
  257.         while True:
  258.             try:
  259.                 if scanner.read()[0] is None:
  260.                     break
  261.             except Errors.UnrecognizedInput, e:
  262.                 scanner.srcline.error(str(e))
  263.                 scanner.begin('Error')
  264.         self.srclines = scanner.srclines
  265.         assert self.srclines
  266.         stream.seek(0)
  267.         lines = stream.readlines()
  268.         for n, line in enumerate(lines):
  269.             self.srclines[n].raw = lines[n].rstrip()
  270.         stream.close()
  271.         self.errors += scanner.errors
  272.  
  273.     def addinclude(self, stream, incfilename):
  274.         infile = open(incfilename)
  275.         for line in infile:
  276.             stream.write(line)
  277.             if line.lower().startswith('#include'):
  278.                 incfilename = line.strip().split()[1]
  279.                 if filename.endswith('.asm'):
  280.                     self.addinclude(stream, incfilename)
  281.  
  282.     def getmacrolabels(self, stream):
  283.         macrolabels = []
  284.         for line in stream:
  285.             if line[0] in ascii_letters:
  286.                 parts = line.split()
  287.                 if parts[1].lower() == 'macro':
  288.                     macrolabels.append(parts[0].upper())
  289.         return macrolabels
  290.  
  291. if __name__ == '__main__':
  292.     import StringIO
  293.     stream = StringIO.StringIO('START INI ;here we gooooo!')
  294.     stream = open('test.asm')
  295.  
  296.     stream = open('bits.asm')
  297.     scanner = AsmScanner(stream)
  298.     scanner.read()
  299.     for asmline in scanner.srclines:
  300.         print '%-8s %3s %-8s %s' % (asmline.label, asmline.mnemonic, asmline.operand, asmline.comment)
  301.         for text, col in asmline.errors:
  302.             print '%s: Line: %d Col: %d %s' % (scanner.filename, asmline.lineno, col, text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement