Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # parse the python module index
- # import everything
- # and
- # from everything import *
- #
- # because I was curious, that's why
- #
- # davep 06-Mar-2014
- from HTMLParser import HTMLParser
- from urlparse import urlparse
- import os.path
- import importlib
- # use the requests library instead of urllib
- # http://docs.python-requests.org/en/latest/
- import requests
- url = "http://docs.python.org/2/py-modindex.html"
- class MyHTMLParser(HTMLParser):
- def handle_starttag(self,tag,attr_list):
- if tag=="a":
- for attr in attr_list :
- if "href" not in attr :
- return
- href = attr_list[0][1]
- if not href.startswith("library"):
- return
- url = urlparse(href)
- modulename = os.path.split(url.path)[-1].replace(".html","")
- try :
- importlib.import_module(modulename)
- except ImportError:
- pass
- else:
- print "from {0} import *".format(modulename)
- # print "import", modulename
- def main():
- # only do the URL fetch once (let's be polite to the server)
- cachefilename = "modindex.html"
- if not os.path.exists(cachefilename) :
- r = requests.get(url)
- with open(cachefilename,"w") as outfile:
- outfile.write(r.content)
- html_str = r.content
- else :
- with open("modindex.html","r") as outfile:
- html_str = "".join(outfile.readlines())
- parser = MyHTMLParser()
- parser.feed(html_str)
- if __name__=='__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement