Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class MyHTMLParser(HTMLParser):
- def __init__(self, text, preserve=None):
- HTMLParser.__init__(self)
- self.stack = []
- self.preserve = preserve
- if preserve is None:
- self.preserve = []
- elif isinstance(preserve, basestring):
- self.preserve = [preserve]
- def handle_starttag(self, tag, attrs):
- if tag.lower() in self.preserve:
- self.stack.append( self.__html_start_tag(tag, attrs) )
- def handle_endtag(self, tag):
- if tag.lower() in self.preserve:
- self.stack.append( self.__html_end_tag(tag) )
- def handle_startendtag(self, tag, attrs):
- if tag.lower() in self.preserve:
- self.stack.append( self.__html_startend_tag(tag, attrs) )
- def handle_data(self, data):
- self.stack.append(data)
- def __html_start_tag(self, tag, attrs):
- return '<%s%s>' % (tag, self.__html_attrs(attrs))
- def __html_startend_tag(self, tag, attrs):
- return '<%s%s/>' % (tag, self.__html_attrs(attrs))
- def __html_end_tag(self, tag):
- return '</%s>' % (tag,)
- def __html_attrs(self, attrs):
- _attrs = ''
- if attrs:
- _attrs = ' %s' % (' '.join(['%s="%s"' % (item[0],item[1]) for item in attrs]))
- return _attrs
- @classmethod
- def parse(cls, text, preserve=None):
- _p = cls(text, preserve)
- _p.feed(text)
- _p.close()
- return "".join(_p.stack)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement