Simple Scraper

from lxml import html
import posixpath
import urllib2

if __name__ == '__main__':
    url = 'https://archive.epa.gov/airtoxics/nata1999/web/html/tables.html'
    t = html.parse(urllib2.urlopen(url))
    select_eles = t.xpath('//select')
    for select_ele in select_eles:
        to_download = []
        base_url = posixpath.dirname(url)
        for option in select_ele.xpath('option'):
            if option.attrib['value'].startswith('javascript'):
                continue
            print posixpath.join(base_url, option.attrib['value'])