Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def save_article():
- url = "https://www.nature.com/nature/articles"
- r = requests.get(url)
- if r.status_code == 200:
- soup = BeautifulSoup(r.content, 'html.parser')
- title_news = []
- puncs = string.punctuation
- for x in soup.find_all('article'):
- article_type = x.find('span', attrs={'data-test': 'article.type'}).text
- # print(article_type)
- if article_type == '\nNews\n':
- title = x.find('a', {'data-track-action': "view article"}).text
- name = title.strip(' ').translate(str.maketrans(" ", "_", puncs)) + '.text'
- print(name)
- title_news.append(name)
- article_url = f"https://www.nature.com{x.a.get('href')}"
- r2 = requests.get(article_url)
- soup2 = BeautifulSoup(r2.content, 'html.parser')
- text = soup2.find('div', {'class': 'c-article-body'}).text.strip()
- # print(text)
- # with open(name, 'w') as file:
- # file.write(text)
- file = open(name, 'w')
- file.write(text.strip())
- print('file written')
- file.close()
- save_article()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement