Advertisement
here2share

# string_drop_accents_and_remove_non_ascii.py

Aug 11th, 2015
347
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.06 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. # string_drop_accents_and_remove_non_ascii.py
  3.  
  4. import unicodedata, re
  5.  
  6. def EngConv(string):
  7.     '''
  8.     Return the base character of char, by "removing" any
  9.     diacritics like accents or curls and strokes and the like.
  10.     '''
  11.     result=''
  12.     try:
  13.         for char in string:
  14.             desc = unicodedata.name(char)
  15.             cutoff = desc.find(' WITH ')
  16.             if cutoff != -1:
  17.                 desc = desc[:cutoff]
  18.             result+=unicodedata.lookup(desc)
  19.     except: result = '!!!'
  20.     return re.sub(r'[^\x00-\x7F]+','', result).strip()
  21.  
  22. string=u"À à È è Ì ì Ò ò Ù ù Ǹ ǹ Ẁ ẁ Ỳ ỳ Ǜ ǜ Ὲ ὲ Ὴ ὴ Ὶ ὶ Ὸ ὸ Ὺ ὺ Ὼ ὼ Ἂ ἂ Ἒ ἒ Ἢ ἢ Ἲ ἲ Ὂ ὂ ὒ Ὢ ὢ Ἃ ἃ Ἓ ἓ Ἣ ἣ Ἳ ἳ Ὃ ὃ Ὓ ὓ Ὣ ὣ ᾲ ῂ ῲ ᾊ ᾂ ᾚ ᾒ ᾪ ᾢ ᾋ ᾃ ᾛ ᾓ ᾫ ᾣ ῭ ῒ ῢ Ầ ầ Ề ề Ồ ồ Ằ ằ Ờ ờ Ừ ừ Ѐ ѐ Ѝ ѝ Ѷ ѷ Ὰ ὰ Ȁ ȁ Ȅ ȅ Ȉ ȉ Ȍ ȍ Ȑ ȑ Ȕ ȕ Ḕ ḕ Ṑ ṑ".split(' ')
  23.  
  24. for translate in list(string):
  25.     tmp=EngConv(translate)
  26.     if not tmp: tmp='!!! Unable to Convert !!!'
  27.     print translate,'=',tmp
  28. #
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement