Advertisement
Guest User

Untitled

a guest
Jun 24th, 2018
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.88 KB | None | 0 0
  1. import requests ## url, strony www
  2. import wget ## adresy url
  3. import bs4
  4.  
  5. listaLinkow = list()
  6. listaPDF= list()
  7.  
  8. url = 'https://bmcsystbiol.biomedcentral.com/articles'
  9. page = requests.get(url,verify=True).text
  10. soup = bs4.BeautifulSoup(page,'html.parser')
  11.  
  12. # wyszukiwanie linków do podstron
  13. for link in soup.find_all('li',{'class':'c-list-group__item'}):
  14. for plink in link.find_all('a'):
  15. if ('/articles/' in plink.get('href')):
  16. listaLinkow.append('https://bmcsystbiol.biomedcentral.com' + plink.get('href'))
  17.  
  18. # grzebanie w podstronie nr 1
  19.  
  20. for i in range(0,2):
  21. url = listaLinkow[i]
  22. page = requests.get(url,verify=True).text
  23. soup = bs4.BeautifulSoup(page,'html.parser')
  24. for link in soup.find_all('a',{'id':'articlePdf'}):
  25. listaPDF.append('https:'+link.get('href'))
  26.  
  27. print(listaPDF)
  28. for i in listaPDF:
  29. wget.download(i,'.pdf')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement