
Untitled
By: a guest on
Jun 30th, 2012 | syntax:
None | size: 0.93 KB | hits: 15 | expires: Never
How to handle unicode charters in python with regex
aren’t=ain’t
hello=hey
u'arenu2019t' = u'ainu2019t'
u'hello' = u'hey'
text = u"aren't"
def replace_all(text, dict):
for i, k in dict.iteritems():
#replace all whole words of I with K in lower cased text, regex = bSTRINGb
text = re.sub(r"b" + i + r"b", k , text.lower())
return text
text = u"aren't"
text = u"aren’t"
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
d = {
u'aren’t': u'ain’t',
u'hello': u'hey'
}
#text = u"aren't"
text = u"aren’t"
def replace_all(text, d):
for i, k in d.iteritems():
#replace all whole words of I with K in lower cased text, regex = bSTRINGb
text = re.sub(r"b" + i + r"b", k , text.lower())
return text
if __name__ == '__main__':
newtext = replace_all(text, d)
print newtext
ain’t
>>> re.sub(ur'barenu2019tb', 'rep', u'arenu2019t')
u'rep'