daily pastebin goal
10%
SHARE
TWEET

Untitled

a guest Oct 12th, 2018 56 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from bs4 import BeautifulSoup
  2. from bs4.builder._lxml import LXML as BS_LXML_FEATURE
  3.  
  4. class FragmentSoup(BeautifulSoup):
  5.     """A custom BeautifulSoup implementation that properly represents fragments when using lxml, without wrapping them
  6.        in a document on output (internally, they are still wrapped)."""
  7.    
  8.     def _feed(self, *args, **kwargs):
  9.         if not hasattr(self, '__markup_enclosure'):
  10.             self.__markup_enclosure = None
  11.            
  12.             if BS_LXML_FEATURE in self.builder.features and (not self.builder.is_xml) and not (
  13.                self.markup.startswith('<html') or self.markup.startswith('<!doctype')):
  14.                 # Wrap in an enclosure -- if this is a <body> then don't include another
  15.                 if self.markup.startswith('<body'):
  16.                     self.markup = '<html>%s</html>' % self.markup
  17.                     self.__markup_enclosure = 'html'
  18.                 else:
  19.                     self.markup = '<html><body>%s</body></html>' % self.markup
  20.                     self.__markup_enclosure = 'body'
  21.        
  22.         return super(FragmentSoup, self)._feed(*args, **kwargs)
  23.    
  24.     def decode(self, *args, **kwargs):
  25.         if self.__markup_enclosure is not None:
  26.             targets = self.find(self.__markup_enclosure).children
  27.             return ''.join(t.decode(*args, **kwargs) for t in targets)
  28.         else:
  29.             return super(FragmentSoup, self).decode(*args, **kwargs)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top