SHARE
TWEET

Untitled

a guest Mar 25th, 2019 69 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import re
  2. from collections import namedtuple
  3.  
  4. class Rule(object):
  5.     def __init__(self, name):
  6.         self.name = name
  7.  
  8. class LexRule(Rule):
  9.     def __init__(self, name, regex, value=None, ignore=False):
  10.         super().__init__(name)
  11.         self.regex = re.compile(regex)
  12.         self.value = value
  13.         self.ignore = ignore
  14.  
  15.     def __repr__(self):
  16.         return f'LexRule(\'{self.name}\')'
  17.  
  18. class SpecialRule(Rule):
  19.     def __init__(self, name):
  20.         super().__init__(name)
  21.  
  22.     def __repr__(self):
  23.         return f'SpecialRule(\'{self.name}\')'
  24.  
  25. lex_whitespace = re.compile('[ \t\r]*')
  26.  
  27. SourceLoc = namedtuple('SourceLoc', 'file line column')
  28. Token = namedtuple('Token', 'rule loc value text')
  29.  
  30. Begin = SpecialRule('begin-of-file')
  31. End = SpecialRule('end-of-file')
  32.  
  33. class ParseError(Exception):
  34.     def __init__(self, loc, message):
  35.         super().__init__(f'{loc.file}:{loc.line}:{loc.column}: {message}')
  36.         self.loc = loc
  37.  
  38. def skip_whitespace(source, pos):
  39.     match = lex_whitespace.match(source, pos)
  40.     if match: return match.end()
  41.     else: return pos
  42.  
  43. class Lexer(object):
  44.     def __init__(self):
  45.         self.rules = []
  46.         self.literals = { }
  47.  
  48.     def rule(self, name, regex, value=None):
  49.         rule = LexRule(name, regex, value=value)
  50.         self.rules.append(rule)
  51.         return rule
  52.  
  53.     def ignore(self, regex):
  54.         self.rules.append(LexRule('(ignore)', regex, ignore=True))
  55.  
  56.     def literal(self, *values):
  57.         for v in values:
  58.             self.literals[v] = self.rule(v, re.escape(v))
  59.  
  60.     def lex(self, source, file):
  61.         pos = 0
  62.         line = 1
  63.         line_pos = 0
  64.         slen = len(source)
  65.         while pos < slen:
  66.             pos = skip_whitespace(source, pos)
  67.             best_pos = pos
  68.             best_rule = None
  69.             best_match = None
  70.  
  71.             for rule in self.rules:
  72.                 match = rule.regex.match(source, pos)
  73.                 if match and match.end() > best_pos:
  74.                     best_pos = match.end()
  75.                     best_rule = rule
  76.                     best_match = match
  77.  
  78.             loc = SourceLoc(file, line, pos - line_pos + 1)
  79.             if not best_rule:
  80.                 raise ParseError(loc, 'Bad token')
  81.  
  82.             if not best_rule.ignore:
  83.                 value = None
  84.                 if best_rule.value:
  85.                     value = best_rule.value(best_match)
  86.  
  87.                 yield Token(best_rule, loc, value, source[pos:best_pos])
  88.  
  89.             if source[pos] == '\n':
  90.                 line += 1
  91.                 line_pos = pos + 1
  92.  
  93.             pos = best_pos
  94.  
  95.         loc = SourceLoc(file, line, pos - line_pos + 1)
  96.         yield Token(End, loc, None, 'end-of-file')
  97.  
  98. def format_rule(rule):
  99.     if isinstance(rule, list):
  100.         return 'any of ({})'.format(', '.join(format_rule(r) for r in rule))
  101.     elif isinstance(rule, Rule):
  102.         return rule.name
  103.     elif isinstance(rule, str):
  104.         return f'\'{rule}\''
  105.     else:
  106.         raise TypeError(f'Unsupported rule type {repr(type(rule))}')
  107.  
  108. def format_token(token):
  109.     rl = format_rule(token.rule)
  110.     tk = token.text
  111.     if rl == tk:
  112.         return f'\'{rl}\''
  113.     else:
  114.         return f'{rl} \'{tk}\''
  115.  
  116. class Parser(object):
  117.     def __init__(self, lexer, source, file):
  118.         self.lexer = lexer
  119.         self.token_stream = lexer.lex(source, file)
  120.         self.token = next(self.token_stream)
  121.         self.prev_token = Token(Begin, SourceLoc(file, 0, 0), None, '')
  122.  
  123.     def scan(self):
  124.         if self.token.rule != End:
  125.             self.prev_token = self.token
  126.             self.token = next(self.token_stream)
  127.  
  128.     def peek(self, rule):
  129.         if isinstance(rule, list):
  130.             for r in rule:
  131.                 tok = self.peek(r)
  132.                 if tok: return tok
  133.         elif isinstance(rule, Rule):
  134.             if self.token.rule == rule:
  135.                 return self.token
  136.         elif isinstance(rule, str):
  137.             lit_rule = self.lexer.literals.get(rule)
  138.             if not lit_rule:
  139.                 raise ValueError(f'Unregistered literal token {rule!r}')
  140.             if self.token.rule == lit_rule:
  141.                 return self.token
  142.         else:
  143.             raise TypeError(f'Unsupported rule type {type(rule)!r}')
  144.  
  145.     def accept(self, rule):
  146.         tok = self.peek(rule)
  147.         if tok:
  148.             self.scan()
  149.             return tok
  150.         else:
  151.             return None
  152.  
  153.     def require(self, rule, message=''):
  154.         tok = self.accept(rule)
  155.         if tok:
  156.             return tok
  157.         else:
  158.             ex = format_rule(rule)
  159.             ts = format_token(self.token)
  160.             if message:
  161.                 error_msg = f'Expected {ex} {message}, got {ts}'
  162.             else:
  163.                 error_msg = f'Expected {ex}, got {ts}'
  164.             raise ParseError(self.token.loc, error_msg)
  165.  
  166.     def peek_value(self, rule):
  167.         tok = self.peek(rule)
  168.         return tok.value if tok else None
  169.  
  170.     def accept_value(self, rule):
  171.         tok = self.accept(rule)
  172.         return tok.value if tok else None
  173.  
  174.     def require_value(self, rule, message=''):
  175.         tok = self.require(rule, message)
  176.         return tok.value if tok else None
  177.  
  178.     def fail_prev(self, message):
  179.         raise ParseError(self.prev_token.loc, message)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top