Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import HTMLParser
- import urllib2
- class ZenParser(HTMLParser.HTMLParser):
- def __init__(self, *args, **kwargs):
- HTMLParser.HTMLParser.__init__(self, *args, **kwargs)
- self.state = 'START'
- def handle_data(self, data):
- if self.state == 'CHECKING HEADER':
- if data.strip() == 'The Zen of Python':
- self.state = 'AWAITING ZEN'
- else:
- self.state = 'START'
- elif self.state == 'GATHERING ZEN':
- self.zen = data
- self.state = 'GOT ZEN'
- def handle_starttag(self, tag, attrs):
- if self.state == 'START' and tag == 'h3':
- self.state = 'CHECKING HEADER'
- elif self.state == 'AWAITING ZEN' and tag == 'pre':
- self.state = 'GATHERING ZEN'
- def zen():
- r = urllib2.urlopen('http://legacy.python.org/dev/peps/pep-0020/')
- parser = ZenParser()
- parser.feed(r.read())
- return '\n'.join(line.lstrip() for line in parser.zen.splitlines())
- print zen()
Advertisement
Add Comment
Please, Sign In to add comment