Advertisement
Guest User

Greek regexes

a guest
Jun 4th, 2024
44
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.87 KB | None | 0 0
  1. def fix_problematic_chars(text: str) -> str:
  2.     # Fixes mathematical mu
  3.     text = re.sub(r"µ", "μ", text)
  4.  
  5.     return text
  6.  
  7.  
  8. def fix_latin_capitals(text: str) -> str:
  9.     # Fixes capital letters that are actually latin (A for capital alpha)
  10.  
  11.     # ABCDEFGHIJKLMNOPQRSTUVWXYZ
  12.     # ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
  13.  
  14.     # AMBIGUOUS ONES:
  15.     # AΆBEÉHIÍKMNOÓPTXY
  16.  
  17.     # fmt: off
  18.     latin_to_greek = {
  19.         "A": "Α", "Á": "Ά",
  20.         "B": "Β",
  21.         "E": "Ε", "É": "Έ",
  22.         "H": "Η",
  23.         "I": "Ι", "Í": "Ί",
  24.         "K": "Κ",
  25.         "M": "Μ",
  26.         "N": "Ν",
  27.         "O": "Ο", "Ó": "Ό",
  28.         "P": "Ρ",
  29.         "T": "Τ",
  30.         "X": "Χ",
  31.         "Y": "Υ",
  32.     }
  33.     # fmt: on
  34.  
  35.     text = re.sub(r"[AÁBEÉHIÍKMNOÓPTXY](?=[Ά-Ͽ]+)", lambda m: latin_to_greek[m.group(0)], text)
  36.  
  37.     return text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement