Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- # Set the URL of the blog page to scrape
- url = "http://www.ezfka.com/"
- # Set the user agent string
- user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
- # Set the headers with the user agent string
- headers = {"User-Agent": user_agent}
- # Send a GET request to the blog page with the headers
- response = requests.get(url, headers=headers)
- # Parse the HTML content using Beautiful Soup
- soup = BeautifulSoup(response.content, 'html.parser')
- # Find the first post element
- post = soup.find("article")
- # Get the URL of the post
- post_url = post.find('a')['href']
- # Print the post URL
- print(post_url)
- # Retrieve the HTML code of the WordPress post's page
- url = post_url
- response = requests.get(url, headers=headers)
- html = response.text
- # Parse the HTML code using Beautiful Soup
- soup = BeautifulSoup(html, 'html.parser')
- # Find the HTML element that contains the comments section of the post
- comments_section = soup.find("div", {"id":"wpd-threads"})
- # Create an empty dictionary to store user IDs and their reply counts
- reply_counts = {}
- # Find all the HTML elements that represent each user
- user_id_elements = comments_section.findAll("div", {"class":"wpd-comment-author"})
- # Loop through all comments and count replies by each user
- for user_id_element in user_id_elements:
- if user_id_element:
- username = user_id_element.text
- if username in reply_counts:
- reply_counts[username] += 1
- else:
- reply_counts[username] = 1
- # Print the reply counts by each user
- print("There are a total of " + str(sum(reply_counts.values())) + " comments.")
- for user_id, count in reply_counts.items():
- print(f"User {user_id} made {count} comments.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement