Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def fix_problematic_chars(text: str) -> str:
- # Fixes mathematical mu
- text = re.sub(r"µ", "μ", text)
- return text
- def fix_latin_capitals(text: str) -> str:
- # Fixes capital letters that are actually latin (A for capital alpha)
- # ABCDEFGHIJKLMNOPQRSTUVWXYZ
- # ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
- # AMBIGUOUS ONES:
- # AΆBEÉHIÍKMNOÓPTXY
- # fmt: off
- latin_to_greek = {
- "A": "Α", "Á": "Ά",
- "B": "Β",
- "E": "Ε", "É": "Έ",
- "H": "Η",
- "I": "Ι", "Í": "Ί",
- "K": "Κ",
- "M": "Μ",
- "N": "Ν",
- "O": "Ο", "Ó": "Ό",
- "P": "Ρ",
- "T": "Τ",
- "X": "Χ",
- "Y": "Υ",
- }
- # fmt: on
- text = re.sub(r"[AÁBEÉHIÍKMNOÓPTXY](?=[Ά-Ͽ]+)", lambda m: latin_to_greek[m.group(0)], text)
- return text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement