Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class StringPreprocessor(object):
- """Constructor for a extensible unit expression string preprocessor."""
- # List of default replacement pairs
- _default_replacements = [(",", ""),
- (" per ", "/"),
- ("^", "**")]
- # List of default regex substitution pairs.
- _default_subs_re = [('\N{DEGREE SIGN}', " degree"),
- (r"([\w\.\-\+\*\\\^])\s+", r"\1 "), # merge multiple spaces
- (r"({}) squared", r"\1**2"), # Handle square and cube
- (r"({}) cubed", r"\1**3"),
- (r"cubic ({})", r"\1**3"),
- (r"square ({})", r"\1**2"),
- (r"sq ({})", r"\1**2"),
- (r"\b([0-9]+\.?[0-9]*)(?=[e|E][a-zA-Z]|[a-df-zA-DF-Z])", r"\1*"), # Handle numberLetter for multiplication
- (r"([\w\.\-])\s+(?=\w)", r"\1*"), # Handle space for multiplication
- ]
- # Define pretty format translation and regexes
- _pretty_table = maketrans('⁰¹²³⁴⁵⁶⁷⁸⁹·⁻', '0123456789*-')
- _pretty_exp_re = re.compile(r"⁻?[⁰¹²³⁴⁵⁶⁷⁸⁹]+(?:\.[⁰¹²³⁴⁵⁶⁷⁸⁹]*)?")
- def __init__(self):
- # Instantiate by compiling default regexes and setting replacements list from defaults
- self.reset_regex_subs()
- self.reset_replacements()
- def __call__(self, input_string):
- """Preprocess input string according to defined replacements and regexes.
- Processing occurs in the following order:
- 1) String replacements as defined by tuples in the replacements
- 2) Regex substitutions (both default and those added by `add_regex_sub()`)
- 3) Pretty text format character handling
- """
- # String replacements
- for current, replacement in self._replacements:
- input_string = input_string.replace(current, replacement)
- # Regex substitutions
- for a, b in self._compiled_subs_re:
- input_string = a.sub(b, input_string)
- # Replace pretty format characters
- for pretty_exp in self._pretty_exp_re.findall(input_string):
- exp = '**' + pretty_exp.translate(self._pretty_table)
- input_string = input_string.replace(pretty_exp, exp)
- input_string = input_string.translate(self._pretty_table)
- return input_string
- def add_replacement(self, current, replacement):
- """Add given replacement pair to the replacement list."""
- self._replacements.append((current, replacement))
- def reset_replacements(self):
- """Reset replacement list to default."""
- self._replacements = self._default_replacements
- def add_regex_sub(self, regex_string, replacement):
- """Compile the given regex_string and append it to the regex sub list."""
- self._compiled_subs_re.append((re.compile(regex_string), replacement))
- def reset_regex_subs(self):
- """Reset regex substitution list to default."""
- self._compiled_subs_re = [(re.compile(a.format(r"[_a-zA-Z][_a-zA-Z0-9]*")), b)
- for a, b in self._default_subs_re]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement