Advertisement
Guest User

Untitled

a guest
Mar 1st, 2012
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.48 KB | None | 0 0
  1. class MyHTMLParser(HTMLParser):
  2.     def __init__(self, text, preserve=None):
  3.         HTMLParser.__init__(self)
  4.         self.stack = []
  5.         self.preserve = preserve
  6.         if preserve is None:
  7.             self.preserve = []
  8.         elif isinstance(preserve, basestring):
  9.             self.preserve = [preserve]
  10.  
  11.     def handle_starttag(self, tag, attrs):
  12.         if tag.lower() in self.preserve:
  13.             self.stack.append( self.__html_start_tag(tag, attrs) )
  14.  
  15.     def handle_endtag(self, tag):
  16.         if tag.lower() in self.preserve:
  17.             self.stack.append( self.__html_end_tag(tag) )
  18.  
  19.     def handle_startendtag(self, tag, attrs):
  20.         if tag.lower() in self.preserve:
  21.             self.stack.append( self.__html_startend_tag(tag, attrs) )
  22.  
  23.     def handle_data(self, data):
  24.         self.stack.append(data)
  25.  
  26.     def __html_start_tag(self, tag, attrs):
  27.         return '<%s%s>' % (tag, self.__html_attrs(attrs))
  28.  
  29.     def __html_startend_tag(self, tag, attrs):
  30.         return '<%s%s/>' % (tag, self.__html_attrs(attrs))
  31.  
  32.     def __html_end_tag(self, tag):
  33.         return '</%s>' % (tag,)
  34.  
  35.     def __html_attrs(self, attrs):
  36.         _attrs = ''
  37.         if attrs:
  38.             _attrs = ' %s' % (' '.join(['%s="%s"' % (item[0],item[1]) for item in attrs]))
  39.         return _attrs
  40.  
  41.     @classmethod
  42.     def parse(cls, text, preserve=None):
  43.         _p = cls(text, preserve)
  44.         _p.feed(text)
  45.         _p.close()
  46.         return "".join(_p.stack)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement