Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # http://stackoverflow.com/questions/39398592/python-get-javascript-file-from-href-tag-of-html
- #
- import requests
- import lxml.html
- url = 'http://a810-bisweb.nyc.gov/bisweb/COsByLocationServlet?requestid=1&allbin=3055311'
- r = requests.get(url)
- #print(r.status_code)
- html = lxml.html.fromstring(r.text)
- for a in html.xpath('//td[@class="content"]/a[contains(@href, "javascript")]'):
- name = a.attrib['href'][14:-12]
- parts = name.split('_')
- f0 = parts[1]
- f1 = parts[4][0] # B
- f2 = parts[4][1:4] # 000
- f3 = parts[4][4:7] + '000' # 11400
- f4 = parts[4]
- download = [
- 'http://a810-bisweb.nyc.gov/bisweb/CofoDocumentContentServlet',
- '?cofomatadata1=', f0,
- '&cofomatadata2=', f1,
- '&cofomatadata3=', f2,
- '&cofomatadata4=', f3,
- '&cofomatadata5=', f4,
- ]
- download = ''.join(download)
- r = requests.get(download, stream=True)
- print('Download:', f4)
- with open(f4, 'wb') as fout:
- for chunk in r.iter_content(1024):
- fout.write(chunk)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement