Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- regex_combining = re.compile(u'[\u0300-\u036f\u1dc0-\u1dff\u20d0-\u20ff\ufe20-\ufe2f]',re.U)
- def remove_diacritics(s):
- """ Decomposes string, then removes combining characters.
- Hand this a unicode string, not an encoded one
- """
- #TODO: Figure out whether the NFC is unnecessary
- return unicodedata.normalize('NFC',
- regex_combining.sub('',unicodedata.normalize('NFD', unicode(s)))
- )
Add Comment
Please, Sign In to add comment