Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- # string_drop_accents_and_remove_non_ascii.py
- import unicodedata, re
- def EngConv(string):
- '''
- Return the base character of char, by "removing" any
- diacritics like accents or curls and strokes and the like.
- '''
- result=''
- try:
- for char in string:
- desc = unicodedata.name(char)
- cutoff = desc.find(' WITH ')
- if cutoff != -1:
- desc = desc[:cutoff]
- result+=unicodedata.lookup(desc)
- except: result = '!!!'
- return re.sub(r'[^\x00-\x7F]+','', result).strip()
- string=u"À à È è Ì ì Ò ò Ù ù Ǹ ǹ Ẁ ẁ Ỳ ỳ Ǜ ǜ Ὲ ὲ Ὴ ὴ Ὶ ὶ Ὸ ὸ Ὺ ὺ Ὼ ὼ Ἂ ἂ Ἒ ἒ Ἢ ἢ Ἲ ἲ Ὂ ὂ ὒ Ὢ ὢ Ἃ ἃ Ἓ ἓ Ἣ ἣ Ἳ ἳ Ὃ ὃ Ὓ ὓ Ὣ ὣ ᾲ ῂ ῲ ᾊ ᾂ ᾚ ᾒ ᾪ ᾢ ᾋ ᾃ ᾛ ᾓ ᾫ ᾣ ῭ ῒ ῢ Ầ ầ Ề ề Ồ ồ Ằ ằ Ờ ờ Ừ ừ Ѐ ѐ Ѝ ѝ Ѷ ѷ Ὰ ὰ Ȁ ȁ Ȅ ȅ Ȉ ȉ Ȍ ȍ Ȑ ȑ Ȕ ȕ Ḕ ḕ Ṑ ṑ".split(' ')
- for translate in list(string):
- tmp=EngConv(translate)
- if not tmp: tmp='!!! Unable to Convert !!!'
- print translate,'=',tmp
- #
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement