Expression Parser

# NOTE FOR TESTERS: Just run this and call the test_parser() function.
# That should make testing for bugs in the expression parser a lot more
# user friendly

# Fun fact:
# 2**16 == phi(phi(phi(phi(phi(phi(phi(phi(phi(phi(phi(phi(phi(phi(21887929759))))))))))))))

# This is based heavily on the parsing structure explained here:
# http://effbot.org/zone/simple-top-down-parsing.htm
class ExpressionParser:
    allowKeywordOverwrite = True

    class symbol_base:
        id = None
        value = None
        first = second = third = None
        pre = post = False

        def nud(self):
            raise SyntaxError("Syntax error (%r)." % self.id)

        def led(self, left):
            raise SyntaxError("Unknown operator (%r)." % self.id)

        def preeval(self):
            if self.first is not None:
                self.first.preeval()
            if self.second is not None:
                self.second.preeval()
            if self.third is not None:
                self.third.preeval()

        def eval(self):
            raise SyntaxError("Evaluation procedure unknown for (%r)." % self.id)

        def posteval(self):
            if self.first is not None:
                self.first.posteval()
            if self.second is not None:
                self.second.posteval()
            if self.third is not None:
                self.third.posteval()

        def call(self, arglist):
            raise SyntaxError("%r not callable" % self.id)

        def __repr__(self):
            if self.id == "(name)" or self.id == "(literal)":
                return "(%s %s)" % (self.id[1:-1], self.value)
            out = [self.id, self.first, self.second, self.third]
            out = map(str, filter(None, out))
            return "(" + " ".join(out) + ")"

    symbol_table = {}
    variables = {}
    functions = {}
    keywords = set()

    def __init__(self):
        import math
        me = self

        def symbol(id, bp=0):
            try:
                s = me.symbol_table[id]
            except KeyError:
                me.keywords.add(id) # Add this symbol as a keyword
                class s(ExpressionParser.symbol_base):
                    pass
                s.__name__ = "symbol-" + id # for debugging
                s.id = id
                s.value = None
                s.lbp = bp
                me.symbol_table[id] = s
            else:
                s.lbp = max(bp, s.lbp)
            return s

        # helpers

        def infix(id, bp):
            def led(self, left):
                self.first = left
                self.second = me.to_parse_tree(bp)
                return self
            s = symbol(id, bp)
            s.led = led
            return s

        def infix_r(id, bp):
            def led(self, left):
                self.first = left
                self.second = me.to_parse_tree(bp-1)
                return self
            s = symbol(id, bp)
            s.led = led
            return s

        def prefix(id, bp):
            def nud(self):
                self.first = me.to_parse_tree(bp)
                self.pre = True
                return self
            s = symbol(id)
            s.nud = nud
            return s

        def postfix(id, bp):
            def led(self, left):
                self.first = left
                self.post = True
                return self
            s = symbol(id, bp)
            s.led = led
            return s

        def constant(id, value):
            if id not in self.variables:
                self.keywords.add(id) # This constant is now a keyword
                self.variables[id] = value

        def advance(id=None):
            if id:
                if not me.token or me.token.id != id:
                    raise SyntaxError("Expected %r" % id)
            me.token = me.next()

        def method(s):
            # decorator
            assert issubclass(s, ExpressionParser.symbol_base)
            def bind(fn):
                setattr(s, fn.__name__, fn)
            return bind

        def function(id):
            # Decorator
            me.keywords.add(id) # Add this function name as a keyword
            def bind(fn):
                me.functions[id] = fn
            return bind

        # For compound operations of the form:
        # a (compoundop) b => a (op1) a (op2) b
        def compound_infix(id, ops, bp):
            op1 = me.symbol_table[ops[0]]
            op2 = me.symbol_table[ops[1]]
            @method(symbol(id, bp))
            def led(self, left):
                o1 = op1()
                o2 = op2()
                o2.led(left)
                o1.first = left; o1.second = o2
                return o1

        # Constants
        constant("pi", math.pi)
        constant("e", math.e)

        # Syntax

        # Assignment
        infix("=", 10)
        @method(symbol("="))
        def eval(self):
            # Evaluate assignment
            if "(name)" == self.first.id:
                if me.test_var_mutable(self.first.value):
                    val = self.second.eval()
                    me.variables[self.first.value] = val
                    return val
                else:
                    raise SyntaxError("Cannot assign a value to keyword %r" % self.first.value)
            else:
                raise SyntaxError("Cannot assign a value to %r" % self.first.id)

        # Deletion
        @method(prefix("del", 140))
        def eval(self):
            if "(name)" == self.first.id:
                if me.test_var_mutable(self.first.value):
                    val = self.first.eval()
                    del me.variables[self.first.value]
                    return val
                else:
                    raise SyntaxError("Cannot delete %r" % self.first.value)
            else:
                raise SyntaxError("Cannot delete %r" % self.first.id)
        self.symbol_table["delete"] = self.symbol_table["del"]

        # Increment
        prefix("++", 140); postfix("++", 130)
        @method(symbol("++"))
        def preeval(self):
            if self.pre:
                # preincrement
                if "(name)" == self.first.id:
                    if me.test_var_mutable(self.first.value):
                        if self.first.value in me.variables:
                            me.variables[self.first.value] += 1
                        else:
                            raise NameError("%r not defined" % self.first.value)
                    else:
                        raise SyntaxError("Cannot increment keyword %r" % self.first.value)
                else:
                    raise SyntaxError("Cannot increment %r" % self.first.id)
        @method(symbol("++"))
        def eval(self):
            # Just return the evaluation
            return self.first.eval()
        @method(symbol("++"))
        def posteval(self):
            if self.post:
                # postincrement
                if "(name)" == self.first.id:
                    if me.test_var_mutable(self.first.value):
                        if self.first.value in me.variables:
                            me.variables[self.first.value] += 1
                        else:
                            raise NameError("%r not defined" % self.first.value)
                    else:
                        raise SyntaxError("Cannot increment keyword %r" % self.first.value)
                else:
                    raise SyntaxError("Cannot increment %r" % self.first.id)

        # Decrement
        prefix("--", 140); postfix("--", 130)
        @method(symbol("--"))
        def preeval(self):
            if self.pre:
                # predecrement
                if "(name)" == self.first.id:
                    if me.test_var_mutable(self.first.value):
                        if self.first.value in me.variables:
                            me.variables[self.first.value] -= 1
                        else:
                            raise NameError("%r not defined" % self.first.value)
                    else:
                        raise SyntaxError("Cannot decrement keyword %r" % self.first.value)
                else:
                    raise SyntaxError("Cannot decrement %r" % self.first.id)
        @method(symbol("--"))
        def eval(self):
            # Just return the evaluation
            return self.first.eval()
        @method(symbol("--"))
        def posteval(self):
            if self.post:
                # postdecrement
                if "(name)" == self.first.id:
                    if me.test_var_mutable(self.first.value):
                        if self.first.value in me.variables:
                            me.variables[self.first.value] -= 1
                        else:
                            raise NameError("%r not defined" % self.first.value)
                    else:
                        raise SyntaxError("Cannot decrement keyword %r" % self.first.value)
                else:
                    raise SyntaxError("Cannot decrement %r" % self.first.id)

        # Addition
        infix("+", 110); prefix("+", 130)
        @method(symbol("+"))
        def eval(self):
            # evaluate addition
            if self.second is None: # Prefix
                return self.first.eval()
            else: # Infix
                return self.first.eval() + self.second.eval()

        # Subtraction and negation
        infix("-", 110); prefix("-", 130)
        @method(symbol("-"))
        def eval(self):
            # evaluate subtraction
            if self.second is None: # Prefix
                return -self.first.eval()
            else: # Infix
                return self.first.eval() - self.second.eval()

        # Multiplication
        @method(infix("*", 120))
        def eval(self):
            # evaluate multiplication
            return self.first.eval() * self.second.eval()

        # Division
        @method(infix("/", 120))
        def eval(self):
            # evaluate division
            a = self.first.eval()
            b = self.second.eval()
            if isinstance(a, (int, long)) and isinstance(b, (int, long)) and 0 != a % b:
                return float(a) / float(b)
            else:
                return a / b

        # Floor division
        @method(infix("//", 120))
        def eval(self):
            # evaluate floor division
            return int(self.first.eval() // self.second.eval())

        # In-place addition
        compound_infix("+=", ("=", "+"), 10)

        # In-place subtraction
        compound_infix("-=", ("=", "-"), 10)

        # In-place multiplication
        compound_infix("*=", ("=", "*"), 10)

        # In-place division
        compound_infix("/=", ("=", "/"), 10)

        # In-place floor division
        compound_infix("//=", ("=", "//"), 10)

        # Exponentiation
        @method(infix_r("**", 140))
        def eval(self):
            # evaluate exponentiation
            return self.first.eval() ** self.second.eval()

        # Factorial
        @method(postfix("!", 140))
        def eval(self):
            # evaluate factorial
            val = self.first.eval()
            if isinstance(val, (int, long)) and val >= 0:
                return math.factorial(val)
            else:
                raise SyntaxError("Factorial is only defined for nonnegative integer values")

        # Parentheses
        @method(symbol("("))
        def nud(self):
            expr = me.to_parse_tree()
            advance(")")
            return expr
        @method(symbol("(", 150)) # lbp so that the parenthesis grabs the expression
        def led(self, left):
            self.first = left
            self.second = []
            if me.token.id != ")":
                while 1:
                    self.second.append(me.to_parse_tree())
                    if me.token.id != ",":
                        break
                    advance(",")
            advance(")")
            return self
        @method(symbol("("))
        def eval(self):
            return self.first.call(self.second)

        symbol(")"); symbol(",")

        # Absolute value function
        @function("abs")
        def call(arglist):
            if 1 == len(arglist):
                return abs(arglist[0].eval())
            else:
                raise SyntaxError("The absolute value function requires 1 argument")

        # Square root function
        @function("sqrt")
        def call(arglist):
            if 1 == len(arglist):
                return math.sqrt(arglist[0].eval())
            else:
                raise SyntaxError("The square root function requires 1 argument")

        # Logarithm (base 10) function
        @function("log")
        def call(arglist):
            if 1 == len(arglist):
                return math.log10(arglist[0].eval())
            else:
                raise SyntaxError("The logarithm function requires 1 argument")

        # Natural Logarithm function
        @function("ln")
        def call(arglist):
            if 1 == len(arglist):
                return math.log(arglist[0].eval())
            else:
                raise SyntaxError("The natural logarithm function requires 1 argument")

        # Logarithm (base 2) function
        @function("log2")
        def call(arglist):
            if 1 == len(arglist):
                return math.log(arglist[0].eval(), 2)
            else:
                raise SyntaxError("The logarithm (base 2) function requires 1 argument")

        # Sine function
        @function("sin")
        def call(arglist):
            if 1 == len(arglist):
                return math.sin(arglist[0].eval())
            else:
                raise SyntaxError("The sine function requires 1 argument")

        # Cosine function
        @function("cos")
        def call(arglist):
            if 1 == len(arglist):
                return math.cos(arglist[0].eval())
            else:
                raise SyntaxError("The cosine function requires 1 argument")

        # Tangent function
        @function("tan")
        def call(arglist):
            if 1 == len(arglist):
                return math.tan(arglist[0].eval())
            else:
                raise SyntaxError("The tangent function requires 1 argument")

        # Arc sine function
        @function("asin")
        def call(arglist):
            if 1 == len(arglist):
                return math.asin(arglist[0].eval())
            else:
                raise SyntaxError("The arc sine function requires 1 argument")
        self.functions["arcsin"] = self.functions["asin"]

        # Arc cosine function
        @function("acos")
        def call(arglist):
            if 1 == len(arglist):
                return math.acos(arglist[0].eval())
            else:
                raise SyntaxError("The arc cosine function requires 1 argument")
        self.functions["arccos"] = self.functions["acos"]

        # Arc tangent function
        @function("atan")
        def call(arglist):
            if 1 == len(arglist):
                return math.atan(arglist[0].eval())
            elif 2 == len(arglist):
                return math.atan2(arglist[0].eval(), arglist[1].eval())
            else:
                raise SyntaxError("The arc tangent function requires 1-2 arguments")
        self.functions["arctan"] = self.functions["atan"]

        # Int function
        @function("int")
        def call(arglist):
            if 1 == len(arglist):
                return int(arglist[0].eval())
            else:
                raise SyntaxError("The int function requires 1 argument")

        # Floor function
        self.functions["floor"] = self.functions["int"] # Int automatically floors

        # Ceil function
        @function("ceil")
        def call(arglist):
            if 1 == len(arglist):
                return int(math.ceil(arglist[0].eval()))
            else:
                raise SyntaxError("The ceil function requires 1 argument")

        # Round function
        @function("round")
        def call(arglist):
            if 1 == len(arglist):
                return int(round(arglist[0].eval()))
            else:
                raise SyntaxError("The round function requires 1 argument")

        # Permutation function
        @function("nPr")
        def call(arglist):
            if 2 == len(arglist):
                n = arglist[0].eval()
                r = arglist[1].eval()
                if isinstance(n, (int, long)) and isinstance(r, (int, long)):
                    if 0 <= r and r <= n:
                        p = 1
                        for t in xrange(n-r+1, n+1):
                            p = p * t
                        return p
                    raise SyntaxError("0 <= r <= n required for the permutation function")
                else:
                    raise SyntaxError("The permutation function requires integer arguments")
            else:
                raise SyntaxError("The permutation function requires 2 arguments")

        # Combination function
        @function("nCr")
        def call(arglist):
            if 2 == len(arglist):
                n = arglist[0].eval()
                r = arglist[1].eval()
                if isinstance(n, (int, long)) and isinstance(r, (int, long)):
                    if 0 <= r and r <= n:
                        c = 1
                        for t in xrange(min(r, n-r)):
                            c = c * (n - t) // (t + 1)
                        return c
                    raise SyntaxError("0 <= r <= n required for the combination function")
                else:
                    raise SyntaxError("The combination function requires integer arguments")
            else:
                raise SyntaxError("The combination function requires 2 arguments")

        # Coprime counting function (Euler's totient)
        @function("phi")
        def call(arglist):
            if 1 == len(arglist):
                num = arglist[0].eval()
                if isinstance(num, (int, long)):
                    # Factor the number and compute phi
                    phi = num
                    if 0 == num % 2:
                        phi = phi / 2
                        num /= 2
                        while 0 == num % 2:
                            num /= 2
                    if 0 == num % 3:
                        phi = phi / 3 * 2
                        num /= 3
                        while 0 == num % 3:
                            num /= 3
                    i = 0
                    while i <= num ** 0.5:
                        i += 6
                        if 0 == num % (i - 1):
                            phi = phi / (i - 1) * (i - 2)
                            num /= (i - 1)
                            while 0 == num % (i - 1):
                                num /= (i - 1)
                        if 0 == num % (i + 1):
                            phi = phi / (i + 1) * i
                            num /= (i + 1)
                            while 0 == num % (i + 1):
                                num /= (i + 1)
                    if 1 != num:
                        phi = phi / num * (num - 1)
                    return phi
                else:
                    raise SyntaxError("The coprime counting function requires an integer argument")
            else:
                raise SyntaxError("The coprime counting function requires 1 argument")

        # Literals
        symbol("(literal)").nud = lambda self: self
        symbol("(literal)").eval = lambda self: self.value

        # Variables/named objects
        symbol("(name)").nud = lambda self: self
        @method(symbol("(name)"))
        def eval(self):
            val = me.variables.get(self.value, None)
            if val is None:
                raise NameError("%r is not defined" % self.value)
            else:
                return val
        @method(symbol("(name)"))
        def call(self, arglist):
            funct = me.functions.get(self.value, None)
            if funct is None:
                raise SyntaxError("%r not callable" % self.value)
            else:
                return funct(arglist)

        # End symbol for evaluation termination
        symbol("(end)")

    import re
    def test_var_name(self, name):
        return test_var_name.test.match(name)
    test_var_name.test = re.compile("[a-zA-Z_]\w*$")

    def test_var_mutable(self, name):
        if name in self.keywords:
            if self.allowKeywordOverwrite:
                del self.keywords[name]
            else:
                return False
        return True

    def set_variable(self, id, value):
        if self.test_var_name(id):
            if self.test_var_mutable(id):
                self.variables[id] = value
            else:
               raise NameError("Cannot overwrite keyword %r" % id)
        else:
            raise NameError("Variable name must begin with [a-zA-Z_] and contain only [a-zA-Z0-9_]")

    def del_variable(self, id):
        if id in self.variables:
            if self.test_var_mutable(id):
                del self.variables[id]
            else:
               raise NameError("Cannot delete keyword %r" % id)
        else:
            raise NameError("%r not defined" % id)

    def tokenize(self, expression):
        import re
        splitter = re.compile("\s*(?:([0-9\.]+)|(\w+)|([^\s\w]+))")
        for number, symbol, other in splitter.findall(expression):
            if number:
                s = self.symbol_table["(literal)"]()
                try:
                    s.value = int(number)
                except ValueError:
                    try:
                        s.value = float(number)
                    except ValueError:
                        raise SyntaxError("Invalid number %r" % number)
            elif symbol:
                s = self.symbol_table.get(symbol, None)
                if s is None:
                    s = self.symbol_table["(name)"]()
                    s.value = symbol
                else:
                    s = s()
            elif other:
                while 0 != len(other):
                    bestmatch = ""
                    match = ""
                    for c in other:
                        match += c
                        if match in self.symbol_table:
                            bestmatch = match
                    if 0 == len(bestmatch):
                        raise SyntaxError("Unknown operator %r" % other)
                    else:
                        other = other[len(bestmatch):]
                        s = self.symbol_table[bestmatch]()
                        if 0 != len(other):
                            yield s
            yield s
        yield self.symbol_table["(end)"]()

    def to_parse_tree(self, rbp=0):
        t = self.token
        try:
            self.token = self.next()
        except StopIteration:
            raise SyntaxError("Expression ends unexpectedly")
        left = t.nud()
        while rbp < self.token.lbp:
            t = self.token
            try:
                self.token = self.next()
            except StopIteration:
                raise SyntaxError("Expression ends unexpectedly")
            left = t.led(left)
        return left

    def parse(self, expression):
        self.next = self.tokenize(expression).next
        self.token = self.next()
        tree = self.to_parse_tree()
        try:
            self.next()
        except StopIteration:
            # This is a good thing
            tree.preeval() # Do preevaluation
            val = tree.eval()
            tree.posteval() # Do postevaluation
            return val
        else:
            raise SyntaxError("Expression did not end when expected")

def test_parser():
    exp = ExpressionParser()
    exp.allowKeywordOverwrite = False
    print "Known operations:"
    print "Constants: pi, e"
    print "Operations: +, -, *, /, //, **, !, parenthesis, =, del, delete"
    print "\t-- (pre and post), ++ (pre and post), +=, -=, *=, /=, //=,"
    print "Functions: abs, sqrt, log (this is base 10), ln, log2, sin, cos,"
    print "\ttan, asin, arcsin, acos, arccos, atan (can take 1 or 2 arguments),"
    print "\tarctan (can take 1 or 2 arguments), int, floor, ceil, round,"
    print "\tnPr (permutation function), nCr (combination function),"
    print "\tphi (Euler's totient, coprime counting function)"
    print "You can set a variable using 'var_name = (expression)'"
    print "You can delete variables using 'del/delete var_name'"
    print "Keywords are not allowed to be changed."
    while True:
        expression = raw_input("Enter an expression to parse or exit to quit: ")
        if "exit" == expression:
            break
        else:
            var = None
            lst = expression.split()
            if 0 == len(lst):
                continue
            try:
                val = exp.parse(expression)
            except Exception, err:
                print "There was an issue with evaluation:", err
            else:
                print val