Guest User

Untitled

a guest
Jun 24th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.81 KB | None | 0 0
  1. """
  2. XHTML cleaner, based on Cleaner from lxml.html.
  3. """
  4.  
  5. import copy
  6. from lxml.html import clean
  7. from lxml.html import tostring, fromstring, bytes
  8.  
  9.  
  10. def _transform_result(typ, result):
  11. """Convert the result back into the input type.
  12. """
  13. if issubclass(typ, bytes):
  14. return tostring(result, encoding = 'utf-8', method = 'xml')
  15. elif issubclass(typ, unicode):
  16. return tostring(result, encoding = unicode, method = 'xml')
  17. else:
  18. return result
  19.  
  20.  
  21.  
  22. class Cleaner(clean.Cleaner):
  23. def clean_html(self, html):
  24. result_type = type(html)
  25. if isinstance(html, basestring):
  26. doc = fromstring(html)
  27. else:
  28. doc = copy.deepcopy(html)
  29. self(doc)
  30. return _transform_result(result_type, doc)
  31.  
  32. cleaner = Cleaner()
  33. clean_html = cleaner.clean_html
Add Comment
Please, Sign In to add comment