Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from requests import get
- from requests.exceptions import RequestException
- from contextlib import closing
- from bs4 import BeautifulSoup as soup
- from collections import Counter
- import operator
- def get_html(url):
- print("searching " + url)
- with closing(get(url, stream=True)) as resp:
- return soup(resp.content, 'html.parser')
- def get_posts(html):
- posts = []
- for post in html.select("article"):
- try:
- posts.append(post.aside.h3.strong.a.text)
- except:
- posts.append("Guest/Banned user")
- return posts
- def pages(html):
- for div in html.body.main.div.div.div.select("div"):
- try:
- if div.get("class")[0] == "cTopic" and div.get("class")[1] == "ipsClear" and div.get("class")[2] == "ipsSpacer_top":
- pages = int(div.div.ul.text.split()[div.div.ul.text.split().index("of") + 1]) #Beautiful
- print("Found " + str(pages) + " pages")
- return pages
- except:
- continue
- return 1
- def clean_input(input):
- if input.find("topic") < 0:
- raise ValueError("The URL does not appear to be a thread")
- try:
- return input.split("&")[0]
- except:
- return input
- url = clean_input(input("Enter thread URL:"))
- #url = "https://forum.kerbalspaceprogram.com/index.php?/topic/180429-number-of-posts-in-a-thread/"
- html = get_html(url)
- all_posts = []
- counter = 1
- for page in range(pages(html)):
- if counter == 1:
- all_posts += get_posts(html)
- else:
- all_posts += get_posts(get_html(url + "&page=" + str(counter)))
- counter += 1
- print("---------")
- occurences = Counter(all_posts)
- sorted_occurences = sorted(occurences.items(), key=operator.itemgetter(1))[::-1]
- for user in sorted_occurences:
- if user[1] == 1:
- print(user[0] + " posted 1 time")
- else:
- print(user[0] + " posted " + str(user[1]) + " times")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement