Advertisement
Fhernd

parser.py

Apr 27th, 2018
4,654
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.73 KB | None | 0 0
  1. # example.py
  2. #
  3. # An example of writing a simple recursive descent parser
  4.  
  5. import re
  6. import collections
  7.  
  8. # Token specification
  9. NUM    = r'(?P<NUM>\d+)'
  10. PLUS   = r'(?P<PLUS>\+)'
  11. MINUS  = r'(?P<MINUS>-)'
  12. TIMES  = r'(?P<TIMES>\*)'
  13. DIVIDE = r'(?P<DIVIDE>/)'
  14. LPAREN = r'(?P<LPAREN>\()'
  15. RPAREN = r'(?P<RPAREN>\))'
  16. WS     = r'(?P<WS>\s+)'
  17.  
  18. master_pat = re.compile('|'.join([NUM, PLUS, MINUS, TIMES,
  19.                                   DIVIDE, LPAREN, RPAREN, WS]))
  20.  
  21. # Tokenizer
  22. Token = collections.namedtuple('Token', ['type','value'])
  23.  
  24. def generate_tokens(text):
  25.     scanner = master_pat.scanner(text)
  26.     for m in iter(scanner.match, None):
  27.         tok = Token(m.lastgroup, m.group())
  28.         if tok.type != 'WS':
  29.             yield tok
  30.  
  31. # Parser
  32. class ExpressionEvaluator:
  33.     '''
  34.    Implementation of a recursive descent parser.   Each method
  35.    implements a single grammar rule.  Use the ._accept() method
  36.    to test and accept the current lookahead token.  Use the ._expect()
  37.    method to exactly match and discard the next token on on the input
  38.    (or raise a SyntaxError if it doesn't match).
  39.    '''
  40.  
  41.     def parse(self,text):
  42.         self.tokens = generate_tokens(text)
  43.         self.tok = None             # Last symbol consumed
  44.         self.nexttok = None         # Next symbol tokenized
  45.         self._advance()             # Load first lookahead token
  46.         return self.expr()
  47.  
  48.     def _advance(self):
  49.         'Advance one token ahead'
  50.         self.tok, self.nexttok = self.nexttok, next(self.tokens, None)
  51.  
  52.     def _accept(self,toktype):
  53.         'Test and consume the next token if it matches toktype'
  54.         if self.nexttok and self.nexttok.type == toktype:
  55.             self._advance()
  56.             return True
  57.         else:
  58.             return False
  59.  
  60.     def _expect(self,toktype):
  61.         'Consume next token if it matches toktype or raise SyntaxError'
  62.         if not self._accept(toktype):
  63.             raise SyntaxError('Expected ' + toktype)
  64.  
  65.     # Grammar rules follow
  66.  
  67.     def expr(self):
  68.         "expression ::= term { ('+'|'-') term }*"
  69.  
  70.         exprval = self.term()
  71.         while self._accept('PLUS') or self._accept('MINUS'):
  72.             op = self.tok.type
  73.             right = self.term()
  74.             if op == 'PLUS':
  75.                 exprval += right
  76.             elif op == 'MINUS':
  77.                 exprval -= right
  78.         return exprval
  79.    
  80.     def term(self):
  81.         "term ::= factor { ('*'|'/') factor }*"
  82.  
  83.         termval = self.factor()
  84.         while self._accept('TIMES') or self._accept('DIVIDE'):
  85.             op = self.tok.type
  86.             right = self.factor()
  87.             if op == 'TIMES':
  88.                 termval *= right
  89.             elif op == 'DIVIDE':
  90.                 termval /= right
  91.         return termval
  92.  
  93.     def factor(self):
  94.         "factor ::= NUM | ( expr )"
  95.  
  96.         if self._accept('NUM'):
  97.             return int(self.tok.value)
  98.         elif self._accept('LPAREN'):
  99.             exprval = self.expr()
  100.             self._expect('RPAREN')
  101.             return exprval
  102.         else:
  103.             raise SyntaxError('Expected NUMBER or LPAREN')
  104.  
  105. if __name__ == '__main__':
  106.     e = ExpressionEvaluator()
  107.     print(e.parse('2'))
  108.     print(e.parse('2 + 3'))
  109.     print(e.parse('2 + 3 * 4'))
  110.     print(e.parse('2 + (3 + 4) * 5'))
  111.  
  112. # Example of building trees
  113.  
  114. class ExpressionTreeBuilder(ExpressionEvaluator):
  115.     def expr(self):
  116.         "expression ::= term { ('+'|'-') term }"
  117.  
  118.         exprval = self.term()
  119.         while self._accept('PLUS') or self._accept('MINUS'):
  120.             op = self.tok.type
  121.             right = self.term()
  122.             if op == 'PLUS':
  123.                 exprval = ('+', exprval, right)
  124.             elif op == 'MINUS':
  125.                 exprval = ('-', exprval, right)
  126.         return exprval
  127.    
  128.     def term(self):
  129.         "term ::= factor { ('*'|'/') factor }"
  130.  
  131.         termval = self.factor()
  132.         while self._accept('TIMES') or self._accept('DIVIDE'):
  133.             op = self.tok.type
  134.             right = self.factor()
  135.             if op == 'TIMES':
  136.                 termval = ('*', termval, right)
  137.             elif op == 'DIVIDE':
  138.                 termval = ('/', termval, right)
  139.         return termval
  140.  
  141.     def factor(self):
  142.         'factor ::= NUM | ( expr )'
  143.  
  144.         if self._accept('NUM'):
  145.             return int(self.tok.value)
  146.         elif self._accept('LPAREN'):
  147.             exprval = self.expr()
  148.             self._expect('RPAREN')
  149.             return exprval
  150.         else:
  151.             raise SyntaxError('Expected NUMBER or LPAREN')
  152.  
  153. if __name__ == '__main__':
  154.     e = ExpressionTreeBuilder()
  155.     print(e.parse('2 + 3'))
  156.     print(e.parse('2 + 3 * 4'))
  157.     print(e.parse('2 + (3 + 4) * 5'))
  158.     print(e.parse('2 + 3 + 4'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement