Untitled

import re
import dis
from collections import deque

TOKEN_TYPE_NUMBER = 0
TOKEN_TYPE_ADD = 1
TOKEN_TYPE_SUB = 2
TOKEN_TYPE_MUL = 3
TOKEN_TYPE_DIV = 4
TOKEN_TYPE_ECHO = 5
TOKEN_TYPE_EOL = 6
TOKEN_TYPE_EOF = 7

TOKEN_NAMES = ["num", "add", "sub", "mul", "div", "echo", "eol", "eof"]

PATTERN = r"(\d+)|(\+)|(\-)|(\*)|(\/)|(echo)|(;|\n)"

class Token:
    def __init__(self, t, v, l, s):
        self.type = t
        self.value = v
        self.symbol = s
        self.line = l

    def __str__(self):
        v = self.value if self.value != "\n" else ""
        return f"{TOKEN_NAMES[self.type]} : {v}"

def firstindex(l, cond):
    return next(i for i, v in enumerate(l) if cond(v))

def lastindex(l, cond):
    filtered = [i for i, v in enumerate(l) if cond(v)]
    if len(filtered) == 0: return -1
    *_, last = filtered
    return last

def match_tokens(pattern, text):
    lines = list(re.finditer(r"\n", text))
    for match in re.finditer(pattern, text):
        groups = match.groups()
        nonempty = firstindex(groups, lambda x: x is not None)
        t = nonempty
        v = groups[nonempty]
        l = lastindex(lines, lambda x: x.start() < match.start())
        s = match.start() - (lines[l].start() if l != -1 else 0)

        yield Token(t, v, l + 1, s)

def tokenize(text):
    tokens = list(match_tokens(PATTERN, text))
    if tokens[-1].type != TOKEN_TYPE_EOL:
        tokens.append(Token(TOKEN_TYPE_EOL, "\n", tokens[-1].line, tokens[-1].symbol + len(tokens[-1].value)))
    tokens.append(Token(TOKEN_TYPE_EOF, "", tokens[-1].line + 1, 0))

    return tokens

def parse(tokens):
    i = 0

    def match(t):
        nonlocal i

        if tokens[i].type != t: return False

        i += 1
        return True

    def consume(t):
        if not match(t):
            raise Exception(
                "expected " + TOKEN_NAMES[t] + \
                " got " + TOKEN_NAMES[tokens[i].type] + \
                f" at {tokens[i].line}:{tokens[i].symbol}"
            )

        return True

    def expr():
        additive()

    def additive():
        multiplicative()

        while True:
            if match(TOKEN_TYPE_ADD):
                multiplicative()
                bytecode.append("add")
                continue
            elif match(TOKEN_TYPE_SUB):
                multiplicative()
                bytecode.append("sub")
                continue
            break

    def multiplicative():
        unary()

        while True:
            if match(TOKEN_TYPE_MUL):
                unary()
                bytecode.append("mul")
                continue
            elif match(TOKEN_TYPE_DIV):
                unary()
                bytecode.append("div")
                continue
            break

    def unary():
        if match(TOKEN_TYPE_ADD):
            unary()
            bytecode.append("unpos")
            return
        elif match(TOKEN_TYPE_SUB):
            unary()
            bytecode.append("unneg")
            return

        primary()

    def primary():
        if match(TOKEN_TYPE_NUMBER):
            bytecode.append(f"push {tokens[i - 1].value}")
            return
        elif match(TOKEN_TYPE_ECHO):
            expr()

            bytecode.append("echo")
            return

        raise Exception("unexpected token " + TOKEN_NAMES[tokens[i].type] + f" at {tokens[i].line}:{tokens[i].symbol}")

    bytecode = []

    while not match(TOKEN_TYPE_EOF):
        expr()
        consume(TOKEN_TYPE_EOL)

    return bytecode

def test():
    text = """\
-1 / 2 * 2; echo 1 + 2
echo +-1 * 7 / 3\
    """

    print(text)
    print()
    tokens = tokenize(text)
    for tok in tokens:
        print(tok)
    print()
    print("\n".join(parse(tokens)))


if __name__ == '__main__':
    test()