Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- def main():
- base_url = 'https://www.nytimes.com'
- r = requests.get(base_url)
- soup = BeautifulSoup(r.text, "html.parser")
- # filename = input("File to save to: ")
- filename = "Soup_and_req_txt.txt"
- with open(filename, mode='w', encoding='utf-8') as f:
- for story_wrapper in soup.find_all("div", {"class": "story-wrapper"}):
- if story_wrapper.a:
- # print(story_wrapper.a.text)
- f.write(story_wrapper.a.text.replace("\n", " ").strip() + "\n")
- # f.write(story_wrapper.a.text.strip())
- else:
- # print(story_wrapper.contents[0].get_text())
- # f.write(story_wrapper.string.contents[0].strip())
- f.write(story_wrapper.string.contents[0].get_text() + "\n")
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement