Untitled

import collections
import re

def compile(source):
	return Parser(source)

Token = collections.namedtuple('Token', ['type', 'value', 'line', 'column'])
Expression = collections.namedtuple('Expression', ['type', 'sub', 'max', 'min'])

class Parser:

	length = 1
	code = "True"
	cols = set()
	def __init__(self, source):
		tokenizer = Parser.tokenize(source)
		condition = Parser.get_exp(tokenizer)
		self.code, self.cols = Parser.get_string(condition)
		try:
			self.length = condition.max - condition.min + 1
		except TypeError:
			#either min or max or both are un-set, thus the minimum trace length is 1
			self.length = 1

	def get_string(expression):
		#print(expression)
		list = []
		cols = set()
		#print(isinstance(expression, Token))
		if isinstance(expression, str):
			return expression, set()
		if isinstance(expression, Token):
			if expression.type == 'ATOM':
				return expression.value.capitalize(), set()
			elif expression.type == 'NUMBER':
				return str(expression.value), set()
			elif expression.type == 'FUNCTION':
				print(expression)
				return str(expression.value)
		else:
			for x in expression.sub:
				sub = Parser.get_string(x)
				list.append(sub[0])
				cols.update(sub[1])
			if expression.type == 'COMPERATOR':
				print(list[0])
				print(list[1])
				col = ' '.join(list)
				print()
				return "§(" + col + ')', {col}
		inner = ','.join(list)
		if expression.type == 'None':
			return inner, set()
		code = expression.type.format(inner) if '{}' in expression.type else expression.type + '(' + inner + ')'
		return code, cols

	comperators = {
	'>':	'GREATER',
	'<':	'LESSER',
	'>=':	'GREATER_EQUAL',
	'<=':	'LESSER_EQUAL',
	'==':	'EQUAL',
	'!=':	'NOT_EQUAL',
	'<>':	'NOT_EQUAL',
	}
	def get_exp(tokenizer):
		c_sub = []
		c_type = 'None'
		#The endpoint of the expression
		c_max = 1
		#The startingpoint of the expression
		c_min = 1

		for t in tokenizer:
			#Go one level deeper
			if t.type == 'OPEN':
				sub = Parser.get_exp(tokenizer)
				c_max = max(sub.max, c_max) if isinstance(sub, int) else ''
				c_min = min(sub.min, c_min) if isinstance(sub, int) else ''
				c_sub.append(sub)
			#return from a sub-expression
			elif t.type == 'CLOSE':
				return Expression(c_type, c_sub, c_max, c_min)
			elif t.type == 'COMPERATOR':
				c_type = 'COMPERATOR'
				c_sub.append(Parser.comperators[t.value])
			elif t.type in ['AND', 'mean']:
				c_type = t.type
			elif t.type == 'SLICER':
				#deconstruct the slicer
				values =  [int(x) if isint(x) else '' for x in t.value[1:-1].split(':')]
				if len(values) == 1:
					#slicer is of form [int]
					c_min = c_max = values[0]
				else:
					#slicer is of form [int:int]
					c_min, c_max = values

				try:
					always_value = ',' + str(c_max - c_min + 1)
				except TypeError:
					always_value = ''

				#Restructure the slicer into a NEXT(ALWAYS()) Expression
				if c_min == 0 or c_min == '': c_type = 'ALWAYS({}' + always_value + ')'
				else: c_type = 'NEXT(ALWAYS({}' + always_value + '),' + str(c_min) + ')'
			else:
				c_sub.append(t)

		return Expression(c_type, c_sub, c_max, c_min)

	def tokenize(code):
		keywords = {'AND', 'OR', 'UNTIL'}
		token_specification = [
			('NUMBER',		r'\d+(\.\d*)?'),
			('ACCESSOR',	r'@\w+:\w+'),
			('OPEN',		r'\('),
			('CLOSE',		r'\)'),
			('SLICER',		r'\[\d*:?\d*\]'),
			('ATOM',		r'true|false'),
			('KEYWORD',		'|'.join(keywords)),
			('FUNCTION', 	r'[A-Za-z_]+'),
			('VARIABLE',	r'\$[A-Za-z_]+\w*'),
			('COMPERATOR',	r'>=|<=|==|!=|<>|>|<'),
			('NEWLINE',		r'\n'),
			('SKIP',		r'\s'),
			('MISMATCH',	r'.'),
		]
		tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
		line_num = 1
		line_start = 0
		for mo in re.finditer(tok_regex, code):
			kind = mo.lastgroup
			value = mo.group()
			column = mo.start() - line_start
			if kind == 'NUMBER':
				value = float(value) if '.' in value else int(value)
			elif kind == 'KEYWORD':
				kind = value
			elif kind == 'NEWLINE':
				line_start = mo.end()
				line_num += 1
				continue
			elif kind == 'SKIP':
				continue
			elif kind == 'MISMATCH':
				raise RuntimeError(f'{value!r} unexpected on line {line_num}')
			yield Token(kind, value, line_num, column)


	def __str__(self):
		return str(self.cols) + '\n[' + str(self.length) + ']\n' + self.code

def isint(s):
	try:
		int(s)
		return True
	except ValueError:
		return False

if __name__ == '__main__':
	#print(compile('(true)[:1]'))
	print(compile('($steps >= 50) AND (mean($my_var 100) > 20)'))