Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- def emojis_to_html_entities(html):
- import re
- def replace_with_html_entity(match):
- # from something like "u'\U0001f60e'" get only "0001f60"
- codepoint = repr(match.group(0))[4:-1]
- return u'&#x{};'.format(codepoint)
- try:
- # UCS-4
- emoji_pattern = re.compile(u'[\U00010000-\U0010ffff]', flags=re.UNICODE | re.MULTILINE)
- except re.error:
- # UCS-2
- emoji_pattern = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]', flags=re.UNICODE | re.MULTILINE)
- result = re.sub(emoji_pattern, replace_with_html_entity, html)
- return result
- if __name__ == "__main__":
- s = u"How 😎 is that, Mr. Müller?"
- r = emojis_to_html_entities(s)
- assert(r == u"How 😎 is that, Mr. Müller?")
- print(r)
Add Comment
Please, Sign In to add comment