skip420

webparagraphscraper

Sep 19th, 2021 (edited)
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.65 KB | None | 0 0
  1. # How to extract paragraph from a website and save it as a text file?
  2.  
  3. import urllib.request
  4. from bs4 import BeautifulSoup
  5.  
  6. # here we have to pass url and path
  7. # (where you want to save ur text file)
  8. urllib.request.urlretrieve("https://www.geeksforgeeks.org/grep-command-in-unixlinux/?ref=leftbar-rightbar",
  9.                            "/home/skip420/Desktop/scrape_Job/notes.txt")
  10.  
  11. file = open("notes.txt", "r")
  12. contents = file.read()
  13. soup = BeautifulSoup(contents, 'html.parser')
  14.  
  15. f = open("products.txt", "w")
  16.  
  17. # traverse paragraphs from soup
  18. for data in soup.find_all("p"):
  19.     sum = data.get_text()
  20.     f.writelines(sum)
  21.  
  22. f.close()
Add Comment
Please, Sign In to add comment