Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import unicodedata, re
- all_chars = (unichr(i) for i in xrange(0x110000))
- control_chars = ''.join(c for c in all_chars if unicodedata.category(c)[0] == 'C')
- cc_re = re.compile('[%s]' % re.escape(control_chars))
- def rm_control_chars(s): # see http://www.unicode.org/reports/tr44/#General_Category_Values
- return cc_re.sub('', s)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement