Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import dis
- from collections import deque
- TOKEN_TYPE_NUMBER = 0
- TOKEN_TYPE_ADD = 1
- TOKEN_TYPE_SUB = 2
- TOKEN_TYPE_MUL = 3
- TOKEN_TYPE_DIV = 4
- TOKEN_TYPE_ECHO = 5
- TOKEN_TYPE_EOL = 6
- TOKEN_TYPE_EOF = 7
- TOKEN_NAMES = ["num", "add", "sub", "mul", "div", "echo", "eol", "eof"]
- PATTERN = r"(\d+)|(\+)|(\-)|(\*)|(\/)|(echo)|(;|\n)"
- class Token:
- def __init__(self, t, v, l, s):
- self.type = t
- self.value = v
- self.symbol = s
- self.line = l
- def __str__(self):
- v = self.value if self.value != "\n" else ""
- return f"{TOKEN_NAMES[self.type]} : {v}"
- def firstindex(l, cond):
- return next(i for i, v in enumerate(l) if cond(v))
- def lastindex(l, cond):
- filtered = [i for i, v in enumerate(l) if cond(v)]
- if len(filtered) == 0: return -1
- *_, last = filtered
- return last
- def match_tokens(pattern, text):
- lines = list(re.finditer(r"\n", text))
- for match in re.finditer(pattern, text):
- groups = match.groups()
- nonempty = firstindex(groups, lambda x: x is not None)
- t = nonempty
- v = groups[nonempty]
- l = lastindex(lines, lambda x: x.start() < match.start())
- s = match.start() - (lines[l].start() if l != -1 else 0)
- yield Token(t, v, l + 1, s)
- def tokenize(text):
- tokens = list(match_tokens(PATTERN, text))
- if tokens[-1].type != TOKEN_TYPE_EOL:
- tokens.append(Token(TOKEN_TYPE_EOL, "\n", tokens[-1].line, tokens[-1].symbol + len(tokens[-1].value)))
- tokens.append(Token(TOKEN_TYPE_EOF, "", tokens[-1].line + 1, 0))
- return tokens
- def parse(tokens):
- i = 0
- def match(t):
- nonlocal i
- if tokens[i].type != t: return False
- i += 1
- return True
- def consume(t):
- if not match(t):
- raise Exception(
- "expected " + TOKEN_NAMES[t] + \
- " got " + TOKEN_NAMES[tokens[i].type] + \
- f" at {tokens[i].line}:{tokens[i].symbol}"
- )
- return True
- def expr():
- additive()
- def additive():
- multiplicative()
- while True:
- if match(TOKEN_TYPE_ADD):
- multiplicative()
- bytecode.append("add")
- continue
- elif match(TOKEN_TYPE_SUB):
- multiplicative()
- bytecode.append("sub")
- continue
- break
- def multiplicative():
- unary()
- while True:
- if match(TOKEN_TYPE_MUL):
- unary()
- bytecode.append("mul")
- continue
- elif match(TOKEN_TYPE_DIV):
- unary()
- bytecode.append("div")
- continue
- break
- def unary():
- if match(TOKEN_TYPE_ADD):
- unary()
- bytecode.append("unpos")
- return
- elif match(TOKEN_TYPE_SUB):
- unary()
- bytecode.append("unneg")
- return
- primary()
- def primary():
- if match(TOKEN_TYPE_NUMBER):
- bytecode.append(f"push {tokens[i - 1].value}")
- return
- elif match(TOKEN_TYPE_ECHO):
- expr()
- bytecode.append("echo")
- return
- raise Exception("unexpected token " + TOKEN_NAMES[tokens[i].type] + f" at {tokens[i].line}:{tokens[i].symbol}")
- bytecode = []
- while not match(TOKEN_TYPE_EOF):
- expr()
- consume(TOKEN_TYPE_EOL)
- return bytecode
- def test():
- text = """\
- -1 / 2 * 2; echo 1 + 2
- echo +-1 * 7 / 3\
- """
- print(text)
- print()
- tokens = tokenize(text)
- for tok in tokens:
- print(tok)
- print()
- print("\n".join(parse(tokens)))
- if __name__ == '__main__':
- test()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement