Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import requests from bs4 import BeautifulSoup
- from urllib.parse import urljoin
- def crawl(url, visited_urls, depth=2):
- if depth == 0 or url in visited_urls:
- return
- print(f"\033[1;92mCrawling: {url}")
- visited_urls.add(url)
- try:
- response = requests.get(url) response.raise_for_status()
- soup = BeautifulSoup(response.text, 'html.parser')
- links = soup.find_all('a', href=True)
- for link in links:
- next_url = urljoin(url, link.get('href'))
- crawl(next_url, visited_urls, depth - 1)
- except requests.exceptions.RequestException as e:
- print(f"\033[1;91mError crawling {url}: {e}")
- start_url = 'https://example.com'
- visited_urls = set()
- crawl(start_url, visited_urls)
- print("")
- print(" \033[1;97mUsage: \033[1;96mnano crawler3.py")
- print("")
- print(" \033[1;97m(Replace https://example.com with your url)")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement