Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import collections
- import re
- def compile(source):
- return Parser(source)
- Token = collections.namedtuple('Token', ['type', 'value', 'line', 'column'])
- Expression = collections.namedtuple('Expression', ['type', 'sub', 'max', 'min'])
- class Parser:
- length = 1
- code = "True"
- cols = set()
- def __init__(self, source):
- tokenizer = Parser.tokenize(source)
- condition = Parser.get_exp(tokenizer)
- self.code, self.cols = Parser.get_string(condition)
- try:
- self.length = condition.max - condition.min + 1
- except TypeError:
- #either min or max or both are un-set, thus the minimum trace length is 1
- self.length = 1
- def get_string(expression):
- #print(expression)
- list = []
- cols = set()
- #print(isinstance(expression, Token))
- if isinstance(expression, str):
- return expression, set()
- if isinstance(expression, Token):
- if expression.type == 'ATOM':
- return expression.value.capitalize(), set()
- elif expression.type == 'NUMBER':
- return str(expression.value), set()
- elif expression.type == 'FUNCTION':
- print(expression)
- return str(expression.value)
- else:
- for x in expression.sub:
- sub = Parser.get_string(x)
- list.append(sub[0])
- cols.update(sub[1])
- if expression.type == 'COMPERATOR':
- print(list[0])
- print(list[1])
- col = ' '.join(list)
- print()
- return "ยง(" + col + ')', {col}
- inner = ','.join(list)
- if expression.type == 'None':
- return inner, set()
- code = expression.type.format(inner) if '{}' in expression.type else expression.type + '(' + inner + ')'
- return code, cols
- comperators = {
- '>': 'GREATER',
- '<': 'LESSER',
- '>=': 'GREATER_EQUAL',
- '<=': 'LESSER_EQUAL',
- '==': 'EQUAL',
- '!=': 'NOT_EQUAL',
- '<>': 'NOT_EQUAL',
- }
- def get_exp(tokenizer):
- c_sub = []
- c_type = 'None'
- #The endpoint of the expression
- c_max = 1
- #The startingpoint of the expression
- c_min = 1
- for t in tokenizer:
- #Go one level deeper
- if t.type == 'OPEN':
- sub = Parser.get_exp(tokenizer)
- c_max = max(sub.max, c_max) if isinstance(sub, int) else ''
- c_min = min(sub.min, c_min) if isinstance(sub, int) else ''
- c_sub.append(sub)
- #return from a sub-expression
- elif t.type == 'CLOSE':
- return Expression(c_type, c_sub, c_max, c_min)
- elif t.type == 'COMPERATOR':
- c_type = 'COMPERATOR'
- c_sub.append(Parser.comperators[t.value])
- elif t.type in ['AND', 'mean']:
- c_type = t.type
- elif t.type == 'SLICER':
- #deconstruct the slicer
- values = [int(x) if isint(x) else '' for x in t.value[1:-1].split(':')]
- if len(values) == 1:
- #slicer is of form [int]
- c_min = c_max = values[0]
- else:
- #slicer is of form [int:int]
- c_min, c_max = values
- try:
- always_value = ',' + str(c_max - c_min + 1)
- except TypeError:
- always_value = ''
- #Restructure the slicer into a NEXT(ALWAYS()) Expression
- if c_min == 0 or c_min == '': c_type = 'ALWAYS({}' + always_value + ')'
- else: c_type = 'NEXT(ALWAYS({}' + always_value + '),' + str(c_min) + ')'
- else:
- c_sub.append(t)
- return Expression(c_type, c_sub, c_max, c_min)
- def tokenize(code):
- keywords = {'AND', 'OR', 'UNTIL'}
- token_specification = [
- ('NUMBER', r'\d+(\.\d*)?'),
- ('ACCESSOR', r'@\w+:\w+'),
- ('OPEN', r'\('),
- ('CLOSE', r'\)'),
- ('SLICER', r'\[\d*:?\d*\]'),
- ('ATOM', r'true|false'),
- ('KEYWORD', '|'.join(keywords)),
- ('FUNCTION', r'[A-Za-z_]+'),
- ('VARIABLE', r'\$[A-Za-z_]+\w*'),
- ('COMPERATOR', r'>=|<=|==|!=|<>|>|<'),
- ('NEWLINE', r'\n'),
- ('SKIP', r'\s'),
- ('MISMATCH', r'.'),
- ]
- tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
- line_num = 1
- line_start = 0
- for mo in re.finditer(tok_regex, code):
- kind = mo.lastgroup
- value = mo.group()
- column = mo.start() - line_start
- if kind == 'NUMBER':
- value = float(value) if '.' in value else int(value)
- elif kind == 'KEYWORD':
- kind = value
- elif kind == 'NEWLINE':
- line_start = mo.end()
- line_num += 1
- continue
- elif kind == 'SKIP':
- continue
- elif kind == 'MISMATCH':
- raise RuntimeError(f'{value!r} unexpected on line {line_num}')
- yield Token(kind, value, line_num, column)
- def __str__(self):
- return str(self.cols) + '\n[' + str(self.length) + ']\n' + self.code
- def isint(s):
- try:
- int(s)
- return True
- except ValueError:
- return False
- if __name__ == '__main__':
- #print(compile('(true)[:1]'))
- print(compile('($steps >= 50) AND (mean($my_var 100) > 20)'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement