Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os, requests, bs4,urllib
- url = 'http://www.sfmaria-iasi.ro/?page_id=435'
- os.makedirs('SandusPDfs', exist_ok=True)
- i = 0
- res = requests.get(url)
- res.raise_for_status()
- soup = bs4.BeautifulSoup(res.text)
- for tag in soup.findAll('a',href=True):
- tag['href'] = urllib.parse.urljoin(url,tag['href'])
- if os.path.splitext(os.path.basename(tag['href']))[1] == '.pdf':
- current = requests.get(tag['href'])
- print ("Descarc %s..." %(os.path.basename(tag['href'])))
- pdfFile = open(os.path.join('SandusPDFs',os.path.basename(tag['href'])),'wb')
- for chunk in res.iter_content(999999999):
- pdfFile.write(chunk)
- i+=1
- print ("Am descarcat %s pdf-uri" %(i+1))
Add Comment
Please, Sign In to add comment