Advertisement
Guest User

Untitled

a guest
Sep 15th, 2019
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.13 KB | None | 0 0
  1. class StringPreprocessor(object):
  2. """Constructor for a extensible unit expression string preprocessor."""
  3.  
  4. # List of default replacement pairs
  5. _default_replacements = [(",", ""),
  6. (" per ", "/"),
  7. ("^", "**")]
  8.  
  9. # List of default regex substitution pairs.
  10. _default_subs_re = [('\N{DEGREE SIGN}', " degree"),
  11. (r"([\w\.\-\+\*\\\^])\s+", r"\1 "), # merge multiple spaces
  12. (r"({}) squared", r"\1**2"), # Handle square and cube
  13. (r"({}) cubed", r"\1**3"),
  14. (r"cubic ({})", r"\1**3"),
  15. (r"square ({})", r"\1**2"),
  16. (r"sq ({})", r"\1**2"),
  17. (r"\b([0-9]+\.?[0-9]*)(?=[e|E][a-zA-Z]|[a-df-zA-DF-Z])", r"\1*"), # Handle numberLetter for multiplication
  18. (r"([\w\.\-])\s+(?=\w)", r"\1*"), # Handle space for multiplication
  19. ]
  20.  
  21. # Define pretty format translation and regexes
  22. _pretty_table = maketrans('⁰¹²³⁴⁵⁶⁷⁸⁹·⁻', '0123456789*-')
  23. _pretty_exp_re = re.compile(r"⁻?[⁰¹²³⁴⁵⁶⁷⁸⁹]+(?:\.[⁰¹²³⁴⁵⁶⁷⁸⁹]*)?")
  24.  
  25. def __init__(self):
  26. # Instantiate by compiling default regexes and setting replacements list from defaults
  27. self.reset_regex_subs()
  28. self.reset_replacements()
  29.  
  30. def __call__(self, input_string):
  31. """Preprocess input string according to defined replacements and regexes.
  32.  
  33. Processing occurs in the following order:
  34.  
  35. 1) String replacements as defined by tuples in the replacements
  36. 2) Regex substitutions (both default and those added by `add_regex_sub()`)
  37. 3) Pretty text format character handling
  38. """
  39. # String replacements
  40. for current, replacement in self._replacements:
  41. input_string = input_string.replace(current, replacement)
  42.  
  43. # Regex substitutions
  44. for a, b in self._compiled_subs_re:
  45. input_string = a.sub(b, input_string)
  46.  
  47. # Replace pretty format characters
  48. for pretty_exp in self._pretty_exp_re.findall(input_string):
  49. exp = '**' + pretty_exp.translate(self._pretty_table)
  50. input_string = input_string.replace(pretty_exp, exp)
  51. input_string = input_string.translate(self._pretty_table)
  52.  
  53. return input_string
  54.  
  55. def add_replacement(self, current, replacement):
  56. """Add given replacement pair to the replacement list."""
  57. self._replacements.append((current, replacement))
  58.  
  59. def reset_replacements(self):
  60. """Reset replacement list to default."""
  61. self._replacements = self._default_replacements
  62.  
  63. def add_regex_sub(self, regex_string, replacement):
  64. """Compile the given regex_string and append it to the regex sub list."""
  65. self._compiled_subs_re.append((re.compile(regex_string), replacement))
  66.  
  67. def reset_regex_subs(self):
  68. """Reset regex substitution list to default."""
  69. self._compiled_subs_re = [(re.compile(a.format(r"[_a-zA-Z][_a-zA-Z0-9]*")), b)
  70. for a, b in self._default_subs_re]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement