Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # this should output object tree from own config format
- import re
- class ConfigTokenizerError(Exception):
- pass
- class ConfigParserError(Exception):
- pass
- class ConfigError(Exception):
- pass
- class ConfigTokenizer:
- def __init__(self, buf):
- self.buffer = buf
- self.line = 0
- self.position = 0
- # token is either identifier, integer, float, string or other.
- # comments are ignored.
- def require_identifier(self):
- v = self.get_identifier()
- if v is None:
- raise ConfigTokenizerError('Expected identifier at line %d' % self.line)
- return v
- def get_identifier(self):
- # identifier is something that contains only A-Za-z0-9_
- try:
- v = re.search(r'^([A-Za-z0-9_]+).*', self.buffer[self.position:]).group(1)
- self.position += len(v)
- except:
- return None
- return v
- def require_whitespace(self):
- v = self.get_whitespace()
- if v is None:
- raise ConfigTokenizerError('Expected whitespace at line %d' % self.line)
- return v
- def get_only_whitespace(self):
- try:
- v = re.search(r'^([\s\t\r\n]+).*', self.buffer[self.position:]).group(1)
- if v is None or not v:
- return 0
- self.position += len(v)
- # check for newlines, advance line for all newlines
- self.line += v.count('\n')
- return len(v)
- except:
- return 0
- def get_whitespace(self):
- gwl = 0
- while True:
- wl = 0
- wl += self.get_only_whitespace()
- # try to find nearest one-line comment
- try:
- v = re.search(r'^(\/\/[^\n]*)', self.buffer[self.position:]).group(1)
- # imagine v is found.
- wl += len(v)
- self.position += len(v)
- # usually adds one newline.
- self.line += v.count('\n')
- except:
- pass
- wl += self.get_only_whitespace()
- # try to find nearest multiline comment
- if self.buffer[self.position:self.position+2] == '/*':
- npos = self.buffer.find('*/', self.position+2)
- if npos < 0:
- raise ConfigTokenizerError('No multiline comment end found at line %d' % self.line)
- v = self.buffer[self.position:npos+2]
- # v should now have all comment block
- wl += len(v)
- self.position += len(v)
- # add lines
- self.line += v.count('\n')
- wl += self.get_only_whitespace()
- gwl += wl
- if wl <= 0:
- break
- return gwl
- # "other" are characters (), {}, [], -, +, /, *, =, !, ?, :, ;.
- # when you require this, you can also specify which character you need.
- def require_other(self, which='(){}[]-+/*=!?:;.'):
- v = self.get_other()
- if v is None or v not in which:
- raise ConfigTokenizerError('Expected one of "%s" at line %d' % (which, self.line))
- return v
- def get_other(self, which='(){}[]-+/*=!?:;.'):
- try:
- v = re.search(r'^([\(\)\{\}\[\]\-\+/\*\=\!\?\:\;\.]).*', self.buffer[self.position:]).group(1)
- if v not in which:
- return None
- self.position += len(v)
- except:
- return None
- return v
- def require_string(self):
- v = self.get_string()
- if v is None:
- raise ConfigTokenizerError('Expected string literal at line %d' % self.line)
- return v
- def get_escape(self, sequence):
- seq_dict = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\', '"': '"'}
- try:
- if sequence[0] == 'x' or sequence[0] == 'u': # hexadecimal character
- v = re.search(r'^([0-9A-Fa-f]{1,4}).*', sequence[1:]).group(1)
- return len(v)+1, unichr(int(v, 16))
- elif sequence[0] == '0': # octal character
- v = re.search(r'^([0-8]{1,6}).*', sequence[1:]).group(1)
- return len(v)+1, unichr(int(v, 8))
- elif sequence[0] in seq_dict: # simple character
- return 1, seq_dict[sequence[0]]
- except:
- pass
- return None, None # unknown/invalid escape causes discarding of whole string
- def get_string(self):
- try:
- v = re.search(r'^(\"([^\\\"]|\\.)*\").*', self.buffer[self.position:]).group(1)
- self.position += len(v)
- # now that we got the string, unescape it
- v = v[1:-1]
- ov = ''
- i = -1
- while i+1 < len(v):
- i += 1
- if v[i] == '\\': # escaped character. this is either \0####, \x####, \u####, \r, \n, \t or \\ or \"
- charlen, char = self.get_escape(v[i+1:])
- if char is None:
- return None
- ov += char
- i += charlen
- else:
- ov += v[i]
- return ov
- except:
- pass
- return None
- def require_integer(self):
- v = self.get_integer()
- if v is None:
- raise ConfigTokenizerError('Expected integer at line %d' % self.line)
- return v
- def get_integer(self):
- try:
- # formats allowed:
- # 0x... (hexadecimal)
- # 0... (octal)
- # decimal
- # allow for negative.
- mul = 1
- if self.buffer[self.position] == '-':
- mul = -1
- self.position += 1
- v = re.search(r'^(0x[A-Fa-f0-9]+|0[0-8]+|[0-9]+).*', self.buffer[self.position:]).group(1)
- self.position += len(v)
- if v[0:2] == '0x':
- return int(v[2:], 16) * mul
- elif v[0] == '0':
- return int(v[1:], 8) * mul
- return int(v, 10) * mul
- except:
- return None
- def require_float(self):
- v = self.get_float()
- if v is None:
- raise ConfigTokenizerError('Expected float at line %d' % self.line)
- return v
- def get_float(self):
- try:
- # any count of decimal digits
- # then dot (required)
- # then any count of decimal digits and e or - (this is a hack to allow 1e-2 notation)
- v = re.search(r'^([0-9]*([0-9\-e]+|\.[0-9\-e]*)?).*', self.buffer[self.position:]).group(1)
- self.position += len(v)
- return float(v)
- except:
- return None
- def is_eof(self):
- return self.position >= len(self.buffer)
- class ConfigParser:
- def is_directory_type(self, type):
- return (type in ['directory', 'test'])
- def __init__(self, filename):
- self.data = {'type': 'directory', 'value': {}} # data = list of directories
- with open(filename, 'r') as f:
- tr = ConfigTokenizer(f.read())
- dir_stack = [self.data]
- while not tr.is_eof():
- c_type = tr.get_identifier()
- if c_type is not None:
- tr.get_whitespace()
- c_name = tr.require_identifier()
- tr.get_whitespace()
- # then should come either = or {, depending on type.
- # we also might get :, if this inherits from another field (that field should exist already)
- c_parent = None
- if self.is_directory_type(c_type):
- c_inheritance = tr.get_other(':')
- if c_inheritance is not None:
- tr.get_whitespace()
- c_parent = tr.require_identifier()
- while True:
- if tr.get_other('.') is not None:
- c_parent_part = tr.require_identifier()
- c_parent += '.'+c_parent_part
- continue
- break
- tr.get_whitespace()
- c_operator = tr.require_other('{=')
- if c_operator == '=':
- tr.get_whitespace()
- if c_type == 'string':
- c_value = tr.require_string()
- elif c_type == 'int':
- c_value = tr.require_integer()
- elif c_type == 'float':
- c_value = tr.require_float()
- elif c_type == 'bool':
- c_value = tr.require_identifier()
- if c_value == 'true':
- c_value = True
- elif c_value == 'false':
- c_value = False
- else:
- raise ConfigParserError('"true" or "false" expected at line %d' % tr.line)
- else:
- raise ConfigParserError('Type %s is not a field type at line %d' % (c_type, tr.line))
- if c_name in dir_stack[-1]['value']: # already exists, duplicate
- raise ConfigParserError('Duplicate value at line %d' % tr.line)
- dir_stack[-1]['value'][c_name] = {'type': c_type, 'value': c_value}
- else: # directory
- if not self.is_directory_type(c_type):
- raise ConfigParserError('Type %s is not a directory type at line %d' % (c_type, tr.line))
- # otherwise make dir
- if c_name in dir_stack[-1]['value']: # already exists, duplicate
- raise ConfigParserError('Duplicate value at line %d' % tr.line)
- # process inheritance
- dir = {'type': c_type, 'value': {}}
- if c_parent is not None:
- if c_parent not in dir_stack[-1]['value']:
- parent = self.get_node(c_parent)
- if parent is None:
- raise ConfigParserError('Unknown inherited directory at line %d' % tr.line)
- else:
- parent = dir_stack[-1]['value'][c_parent]
- if not self.is_directory_type(parent['type']):
- raise ConfigParserError('Inherited field is not a directory type (%s) at line %d' % (parent['type'], tr.line))
- for k in parent['value']:
- dir['value'][k] = parent['value'][k]
- dir_stack[-1]['value'][c_name] = dir
- dir_stack.append(dir)
- tr.get_whitespace()
- # check for closing bracket if we're in a directory
- closing_brace = tr.get_other('}')
- if closing_brace is not None:
- if len(dir_stack) == 1:
- raise ConfigParserError('Bracket mismatch at line %d' % tr.line)
- dir_stack = dir_stack[:-1] # go one level back
- tr.get_whitespace()
- # get value by path (dot-separated)
- def get_node(self, path):
- path = path.split('.')
- cnode = self.data
- for i in range(len(path)):
- if not path[i]:
- continue
- if not self.is_directory_type(cnode['type']):
- return None
- if path[i] not in cnode['value']:
- return None
- cnode = cnode['value'][path[i]]
- return cnode
- def get_json_from_node(self, node):
- if self.is_directory_type(node['type']):
- # return map of objects
- out = {}
- for k in node['value']:
- out[k] = self.get_json_from_node(node['value'][k])
- return out
- else:
- return node['value']
- def get_json(self, path):
- node = self.get_node(path)
- return self.get_json_from_node(node)
- def get_int(self, path):
- node = self.get_node(path)
- if node is None or not isinstance(node['value'], int):
- raise ConfigError("Path %s not found or not an integer"%path)
- return node['value']
- def get_float(self, path):
- node = self.get_node(path)
- if node is None or not isinstance(node['value'], float):
- raise ConfigError("Path %s not found or not a float"%path)
- return node['value']
- def get_bool(self, path):
- node = self.get_node(path)
- if node is None or not isinstance(node['value'], bool):
- raise ConfigError("Path %s not found or not a bool"%path)
- return node['value']
- def get_string(self, path):
- node = self.get_node(path)
- if node is None or (not isinstance(node['value'], unicode) and not isinstance(node['value'], str)):
- raise ConfigError("Path %s not found or not a string"%path)
- return node['value']
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement