Guest User

PDFDownloader

a guest
Aug 24th, 2015
381
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.81 KB | None | 0 0
  1. import os, requests, bs4,urllib
  2.  
  3. url = 'http://www.sfmaria-iasi.ro/?page_id=435'
  4. os.makedirs('SandusPDfs', exist_ok=True)
  5. i = 0
  6.  
  7. res = requests.get(url)
  8. res.raise_for_status()
  9. soup = bs4.BeautifulSoup(res.text)
  10.  
  11. for tag in soup.findAll('a',href=True):
  12. tag['href'] = urllib.parse.urljoin(url,tag['href'])
  13. if os.path.splitext(os.path.basename(tag['href']))[1] == '.pdf':
  14. current = requests.get(tag['href'])
  15. print ("Descarc %s..." %(os.path.basename(tag['href'])))
  16.  
  17. pdfFile = open(os.path.join('SandusPDFs',os.path.basename(tag['href'])),'wb')
  18. for chunk in res.iter_content(999999999):
  19. pdfFile.write(chunk)
  20.  
  21. i+=1
  22.  
  23. print ("Am descarcat %s pdf-uri" %(i+1))
Add Comment
Please, Sign In to add comment