Guest User

Untitled

a guest
Oct 12th, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.41 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. from bs4.builder._lxml import LXML as BS_LXML_FEATURE
  3.  
  4. class FragmentSoup(BeautifulSoup):
  5. """A custom BeautifulSoup implementation that properly represents fragments when using lxml, without wrapping them
  6. in a document on output (internally, they are still wrapped)."""
  7.  
  8. def _feed(self, *args, **kwargs):
  9. if not hasattr(self, '__markup_enclosure'):
  10. self.__markup_enclosure = None
  11.  
  12. if BS_LXML_FEATURE in self.builder.features and (not self.builder.is_xml) and not (
  13. self.markup.startswith('<html') or self.markup.startswith('<!doctype')):
  14. # Wrap in an enclosure -- if this is a <body> then don't include another
  15. if self.markup.startswith('<body'):
  16. self.markup = '<html>%s</html>' % self.markup
  17. self.__markup_enclosure = 'html'
  18. else:
  19. self.markup = '<html><body>%s</body></html>' % self.markup
  20. self.__markup_enclosure = 'body'
  21.  
  22. return super(FragmentSoup, self)._feed(*args, **kwargs)
  23.  
  24. def decode(self, *args, **kwargs):
  25. if self.__markup_enclosure is not None:
  26. targets = self.find(self.__markup_enclosure).children
  27. return ''.join(t.decode(*args, **kwargs) for t in targets)
  28. else:
  29. return super(FragmentSoup, self).decode(*args, **kwargs)
Add Comment
Please, Sign In to add comment