Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests ## url, strony www
- import wget ## adresy url
- import bs4
- listaLinkow = list()
- listaPDF= list()
- url = 'https://bmcsystbiol.biomedcentral.com/articles'
- page = requests.get(url,verify=True).text
- soup = bs4.BeautifulSoup(page,'html.parser')
- # wyszukiwanie linków do podstron
- for link in soup.find_all('li',{'class':'c-list-group__item'}):
- for plink in link.find_all('a'):
- if ('/articles/' in plink.get('href')):
- listaLinkow.append('https://bmcsystbiol.biomedcentral.com' + plink.get('href'))
- # grzebanie w podstronie nr 1
- for i in range(0,2):
- url = listaLinkow[i]
- page = requests.get(url,verify=True).text
- soup = bs4.BeautifulSoup(page,'html.parser')
- for link in soup.find_all('a',{'id':'articlePdf'}):
- listaPDF.append('https:'+link.get('href'))
- print(listaPDF)
- for i in listaPDF:
- wget.download(i,'.pdf')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement