Advertisement
RexyBadDog

BeautifulSoup_and_Requests

May 13th, 2022
731
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import requests
  2. from bs4 import BeautifulSoup
  3.  
  4.  
  5. def main():
  6.     base_url = 'https://www.nytimes.com'
  7.     r = requests.get(base_url)
  8.     soup = BeautifulSoup(r.text, "html.parser")
  9.     # filename = input("File to save to: ")
  10.     filename = "Soup_and_req_txt.txt"
  11.     with open(filename, mode='w', encoding='utf-8') as f:
  12.         for story_wrapper in soup.find_all("div", {"class": "story-wrapper"}):
  13.             if story_wrapper.a:
  14.                 # print(story_wrapper.a.text)
  15.                 f.write(story_wrapper.a.text.replace("\n", " ").strip() + "\n")
  16.                 # f.write(story_wrapper.a.text.strip())
  17.  
  18.             else:
  19.                 # print(story_wrapper.contents[0].get_text())
  20.                 # f.write(story_wrapper.string.contents[0].strip())
  21.                 f.write(story_wrapper.string.contents[0].get_text() + "\n")
  22.  
  23.  
  24. if __name__ == "__main__":
  25.     main()
  26.  
Advertisement
Advertisement
Advertisement
RAW Paste Data Copied
Advertisement