Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from lxml import html
- import posixpath
- import urllib2
- if __name__ == '__main__':
- url = 'https://archive.epa.gov/airtoxics/nata1999/web/html/tables.html'
- t = html.parse(urllib2.urlopen(url))
- select_eles = t.xpath('//select')
- for select_ele in select_eles:
- to_download = []
- base_url = posixpath.dirname(url)
- for option in select_ele.xpath('option'):
- if option.attrib['value'].startswith('javascript'):
- continue
- print posixpath.join(base_url, option.attrib['value'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement