Advertisement
MeowalsoMeow

web_scraper_stage4

Jun 18th, 2021
282
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.23 KB | None | 0 0
  1. def save_article():
  2. url = "https://www.nature.com/nature/articles"
  3. r = requests.get(url)
  4. if r.status_code == 200:
  5. soup = BeautifulSoup(r.content, 'html.parser')
  6. title_news = []
  7. puncs = string.punctuation
  8. for x in soup.find_all('article'):
  9. article_type = x.find('span', attrs={'data-test': 'article.type'}).text
  10. # print(article_type)
  11. if article_type == '\nNews\n':
  12. title = x.find('a', {'data-track-action': "view article"}).text
  13. name = title.strip(' ').translate(str.maketrans(" ", "_", puncs)) + '.text'
  14. print(name)
  15. title_news.append(name)
  16. article_url = f"https://www.nature.com{x.a.get('href')}"
  17. r2 = requests.get(article_url)
  18. soup2 = BeautifulSoup(r2.content, 'html.parser')
  19. text = soup2.find('div', {'class': 'c-article-body'}).text.strip()
  20. # print(text)
  21. # with open(name, 'w') as file:
  22. # file.write(text)
  23. file = open(name, 'w')
  24. file.write(text.strip())
  25. print('file written')
  26. file.close()
  27.  
  28.  
  29. save_article()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement