Advertisement
oshkoshbagoshh

beautifoulSoup-Webscrape

Jan 18th, 2024
408
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.97 KB | Source Code | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import re
  4.  
  5. def get_css_classes_ids(url):
  6.     # Send a GET request to the URL
  7.     response = requests.get(url)
  8.     # Check if the request was successful
  9.     if response.status_code == 200:
  10.         # Parse the HTML content
  11.         soup = BeautifulSoup(response.text, 'html.parser')
  12.         # Use regex to find all classes and IDs
  13.         classes = set(re.findall(r'class="([^"]+)"', soup.decode()))
  14.         ids = set(re.findall(r'id="([^"]+)"', soup.decode()))
  15.         # Return a tuple of sets containing classes and IDs
  16.         return classes, ids
  17.     else:
  18.         print(f"Failed to retrieve content from {url}, status code: {response.status_code}")
  19.         return set(), set()
  20.  
  21. # Replace 'http://example.com' with the URL you want to scrape
  22. classes, ids = get_css_classes_ids('http://example.com')
  23.  
  24. print("CSS Classes found:")
  25. for c in classes:
  26.     print(c)
  27.  
  28. print("\nCSS IDs found:")
  29. for i in ids:
  30.     print(i)
  31.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement