Guest User

Untitled

a guest
Jan 17th, 2018
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.78 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. def emojis_to_html_entities(html):
  4. import re
  5.  
  6. def replace_with_html_entity(match):
  7. # from something like "u'\U0001f60e'" get only "0001f60"
  8. codepoint = repr(match.group(0))[4:-1]
  9. return u'&#x{};'.format(codepoint)
  10.  
  11. try:
  12. # UCS-4
  13. emoji_pattern = re.compile(u'[\U00010000-\U0010ffff]', flags=re.UNICODE | re.MULTILINE)
  14. except re.error:
  15. # UCS-2
  16. emoji_pattern = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]', flags=re.UNICODE | re.MULTILINE)
  17.  
  18. result = re.sub(emoji_pattern, replace_with_html_entity, html)
  19. return result
  20.  
  21.  
  22. if __name__ == "__main__":
  23. s = u"How 😎 is that, Mr. Müller?"
  24. r = emojis_to_html_entities(s)
  25. assert(r == u"How 😎 is that, Mr. Müller?")
  26. print(r)
Add Comment
Please, Sign In to add comment