Advertisement
Jackspade9624

crawler3.py

May 30th, 2025 (edited)
26
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.03 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import requests from bs4 import BeautifulSoup
  4. from urllib.parse import urljoin
  5.  
  6. def crawl(url, visited_urls, depth=2):
  7. if depth == 0 or url in visited_urls:
  8. return
  9. print(f"\033[1;92mCrawling: {url}")
  10. visited_urls.add(url)
  11. try:
  12. response = requests.get(url) response.raise_for_status()
  13. soup = BeautifulSoup(response.text, 'html.parser')
  14. links = soup.find_all('a', href=True)
  15. for link in links:
  16. next_url = urljoin(url, link.get('href'))
  17. crawl(next_url, visited_urls, depth - 1)
  18. except requests.exceptions.RequestException as e:
  19. print(f"\033[1;91mError crawling {url}: {e}")
  20.  
  21. start_url = 'https://example.com'
  22. visited_urls = set()
  23. crawl(start_url, visited_urls)
  24.  
  25. print("")
  26. print(" \033[1;97mUsage: \033[1;96mnano crawler3.py")
  27. print("")
  28. print(" \033[1;97m(Replace https://example.com with your url)")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement