make import everything

#!/usr/bin/env python

# parse the python module index
# import everything
# and
# from everything import *
#
# because I was curious, that's why
#
# davep 06-Mar-2014

from HTMLParser import HTMLParser
from urlparse import urlparse
import os.path
import importlib

# use the requests library instead of urllib
# http://docs.python-requests.org/en/latest/
import requests

url = "http://docs.python.org/2/py-modindex.html"

class MyHTMLParser(HTMLParser):
    def handle_starttag(self,tag,attr_list):
        if tag=="a":
            for attr in attr_list :
                if "href" not in attr :
                    return
            href = attr_list[0][1]
            if not href.startswith("library"):
                return

            url = urlparse(href)
            modulename = os.path.split(url.path)[-1].replace(".html","")

            try :
                importlib.import_module(modulename)
            except ImportError:
                pass
            else:
                print "from {0} import *".format(modulename)
#                print "import", modulename

def main():
    # only do the URL fetch once (let's be polite to the server)
    cachefilename = "modindex.html"
    if not os.path.exists(cachefilename) :
        r = requests.get(url)
        with open(cachefilename,"w") as outfile:
            outfile.write(r.content)
        html_str = r.content
    else :
        with open("modindex.html","r") as outfile:
            html_str = "".join(outfile.readlines())

    parser = MyHTMLParser()
    parser.feed(html_str)

if __name__=='__main__':
    main()