Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2019
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.18 KB | None | 0 0
  1. """
  2. Grammer:
  3.  
  4. Expression --> AndTerm { OR AndTerm}+
  5. AndTerm --> Condition { AND Condition}+
  6. Condition --> Terminal (>,<,>=,<=,==) Terminal | (Expression)
  7. Terminal --> Number or String or Variable
  8.  
  9. Usage:
  10. from boolparser import *
  11. p = BooleanParser('<expression text>')
  12. p.evaluate(variable_dict) # variable_dict is a dictionary providing values for variables that appear in <expression text>
  13. """
  14.  
  15.  
  16. class TokenType:
  17. NUM, STR, VAR, GT, GTE, LT, LTE, EQ, NEQ, LP, RP, AND, OR, STRWITH = range(14)
  18.  
  19.  
  20. class TreeNode:
  21. tokenType = None
  22. value = None
  23. left = None
  24. right = None
  25.  
  26. def __init__(self, tokenType):
  27. self.tokenType = tokenType
  28.  
  29.  
  30. class Tokenizer:
  31. expression = None
  32. tokens = None
  33. tokenTypes = None
  34. i = 0
  35.  
  36. def __init__(self, exp):
  37. self.expression = exp
  38.  
  39. def next(self):
  40. self.i += 1
  41. return self.tokens[self.i - 1]
  42.  
  43. def peek(self):
  44. return self.tokens[self.i]
  45.  
  46. def hasNext(self):
  47. return self.i < len(self.tokens)
  48.  
  49. def nextTokenType(self):
  50. return self.tokenTypes[self.i]
  51.  
  52. def nextTokenTypeIsOperator(self):
  53. t = self.tokenTypes[self.i]
  54. return (
  55. t == TokenType.GT
  56. or t == TokenType.GTE
  57. or t == TokenType.LT
  58. or t == TokenType.LTE
  59. or t == TokenType.EQ
  60. or t == TokenType.NEQ
  61. or t == TokenType.STRWITH
  62. )
  63.  
  64. def tokenize(self):
  65. import re
  66.  
  67. reg = re.compile(r"(\bAND\b|\bOR\b|!=|==|<=|>=|<|>|\(|\)|\.\*)")
  68. self.tokens = reg.split(self.expression)
  69. self.tokens = [t.strip() for t in self.tokens if t.strip() != ""]
  70.  
  71. self.tokenTypes = []
  72. for t in self.tokens:
  73. if t == "AND":
  74. self.tokenTypes.append(TokenType.AND)
  75. elif t == "OR":
  76. self.tokenTypes.append(TokenType.OR)
  77. elif t == "(":
  78. self.tokenTypes.append(TokenType.LP)
  79. elif t == ")":
  80. self.tokenTypes.append(TokenType.RP)
  81. elif t == "<":
  82. self.tokenTypes.append(TokenType.LT)
  83. elif t == "<=":
  84. self.tokenTypes.append(TokenType.LTE)
  85. elif t == ">":
  86. self.tokenTypes.append(TokenType.GT)
  87. elif t == ">=":
  88. self.tokenTypes.append(TokenType.GTE)
  89. elif t == "==":
  90. self.tokenTypes.append(TokenType.EQ)
  91. elif t == "!=":
  92. self.tokenTypes.append(TokenType.NEQ)
  93. elif t == ".*":
  94. self.tokenTypes.append(TokenType.STRWITH)
  95. else:
  96. # number of string or variable
  97. if t[0] == t[-1] == '"' or t[0] == t[-1] == "'":
  98. self.tokenTypes.append(TokenType.STR)
  99. else:
  100. try:
  101. number = float(t)
  102. self.tokenTypes.append(TokenType.NUM)
  103. except:
  104. if re.search("^[a-zA-Z_]+$", t):
  105. self.tokenTypes.append(TokenType.VAR)
  106. else:
  107. self.tokenTypes.append(None)
  108.  
  109.  
  110. class BooleanParser:
  111. tokenizer = None
  112. root = None
  113.  
  114. def __init__(self, exp):
  115. self.tokenizer = Tokenizer(exp)
  116. self.tokenizer.tokenize()
  117. self.parse()
  118.  
  119. def parse(self):
  120. self.root = self.parseExpression()
  121.  
  122. def parseExpression(self):
  123. andTerm1 = self.parseAndTerm()
  124. while (
  125. self.tokenizer.hasNext() and self.tokenizer.nextTokenType() == TokenType.OR
  126. ):
  127. self.tokenizer.next()
  128. andTermX = self.parseAndTerm()
  129. andTerm = TreeNode(TokenType.OR)
  130. andTerm.left = andTerm1
  131. andTerm.right = andTermX
  132. andTerm1 = andTerm
  133. return andTerm1
  134.  
  135. def parseAndTerm(self):
  136. condition1 = self.parseCondition()
  137. while (
  138. self.tokenizer.hasNext() and self.tokenizer.nextTokenType() == TokenType.AND
  139. ):
  140. self.tokenizer.next()
  141. conditionX = self.parseCondition()
  142. condition = TreeNode(TokenType.AND)
  143. condition.left = condition1
  144. condition.right = conditionX
  145. condition1 = condition
  146. return condition1
  147.  
  148. def parseCondition(self):
  149. if self.tokenizer.hasNext() and self.tokenizer.nextTokenType() == TokenType.LP:
  150. self.tokenizer.next()
  151. expression = self.parseExpression()
  152. if (
  153. self.tokenizer.hasNext()
  154. and self.tokenizer.nextTokenType() == TokenType.RP
  155. ):
  156. self.tokenizer.next()
  157. return expression
  158. else:
  159. raise Exception("Closing ) expected, but got " + self.tokenizer.next())
  160.  
  161. terminal1 = self.parseTerminal()
  162. if self.tokenizer.hasNext():
  163. if self.tokenizer.nextTokenTypeIsOperator():
  164. condition = TreeNode(self.tokenizer.nextTokenType())
  165. self.tokenizer.next()
  166. terminal2 = self.parseTerminal()
  167. condition.left = terminal1
  168. condition.right = terminal2
  169. return condition
  170. else:
  171. raise Exception("Operator expected, but got " + self.tokenizer.next())
  172. else:
  173. raise Exception("Operator expected, but got nothing")
  174.  
  175. def parseTerminal(self):
  176. if self.tokenizer.hasNext():
  177. tokenType = self.tokenizer.nextTokenType()
  178. if tokenType == TokenType.NUM:
  179. n = TreeNode(tokenType)
  180. n.value = float(self.tokenizer.next())
  181. return n
  182. elif tokenType == TokenType.VAR:
  183. n = TreeNode(tokenType)
  184. n.value = self.tokenizer.next()
  185. return n
  186. elif tokenType == TokenType.STR:
  187. n = TreeNode(tokenType)
  188. n.value = self.tokenizer.next()[1:-1]
  189. return n
  190. else:
  191. raise Exception(
  192. "NUM, STR, or VAR expected, but got " + self.tokenizer.next()
  193. )
  194.  
  195. else:
  196. raise Exception(
  197. "NUM, STR, or VAR expected, but got " + self.tokenizer.next()
  198. )
  199.  
  200. def evaluate(self, variable_dict):
  201. return self.evaluateRecursive(self.root, variable_dict)
  202.  
  203. def evaluateRecursive(self, treeNode, variable_dict):
  204. if treeNode.tokenType == TokenType.NUM or treeNode.tokenType == TokenType.STR:
  205. return treeNode.value
  206. if treeNode.tokenType == TokenType.VAR:
  207. return variable_dict.get(treeNode.value)
  208.  
  209. left = self.evaluateRecursive(treeNode.left, variable_dict)
  210. right = self.evaluateRecursive(treeNode.right, variable_dict)
  211. if treeNode.tokenType == TokenType.GT:
  212. return left > right
  213. elif treeNode.tokenType == TokenType.GTE:
  214. return left >= right
  215. elif treeNode.tokenType == TokenType.LT:
  216. return left < right
  217. elif treeNode.tokenType == TokenType.LTE:
  218. return left <= right
  219. elif treeNode.tokenType == TokenType.EQ:
  220. return left == right
  221. elif treeNode.tokenType == TokenType.NEQ:
  222. return left != right
  223. elif treeNode.tokenType == TokenType.AND:
  224. return left and right
  225. elif treeNode.tokenType == TokenType.OR:
  226. return left or right
  227. elif treeNode.tokenType == TokenType.STRWITH:
  228. return left.startswith(right)
  229. else:
  230. raise Exception("Unexpected type " + str(treeNode.tokenType))
  231.  
  232.  
  233. if __name__ == "__main__":
  234. # Added .startswith operator
  235. p = BooleanParser('account_number .* "abc"')
  236. assert p.evaluate({'account_number': 'abc123'}) == True
  237. assert p.evaluate({'account_number': '1abc123'}) == False
  238.  
  239. # Bug fix in matching string + Include both "<double quoted>" and '<single quoted>' string
  240. double_quoted_p = BooleanParser('account_number == "abc"')
  241. assert p.evaluate({'account_number': 'abc'}) == True
  242.  
  243. single_quoted_p = BooleanParser("account_number == 'abc'")
  244. assert p.evaluate({'account_number': 'abc'}) == True
  245. assert p.evaluate({'account_number': "abc"}) == True
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement