Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import re
- def get_css_classes_ids(url):
- # Send a GET request to the URL
- response = requests.get(url)
- # Check if the request was successful
- if response.status_code == 200:
- # Parse the HTML content
- soup = BeautifulSoup(response.text, 'html.parser')
- # Use regex to find all classes and IDs
- classes = set(re.findall(r'class="([^"]+)"', soup.decode()))
- ids = set(re.findall(r'id="([^"]+)"', soup.decode()))
- # Return a tuple of sets containing classes and IDs
- return classes, ids
- else:
- print(f"Failed to retrieve content from {url}, status code: {response.status_code}")
- return set(), set()
- # Replace 'http://example.com' with the URL you want to scrape
- classes, ids = get_css_classes_ids('http://example.com')
- print("CSS Classes found:")
- for c in classes:
- print(c)
- print("\nCSS IDs found:")
- for i in ids:
- print(i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement