Advertisement
Guest User

post_counter

a guest
Dec 27th, 2018
43
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.92 KB | None | 0 0
  1. from requests import get
  2. from requests.exceptions import RequestException
  3. from contextlib import closing
  4. from bs4 import BeautifulSoup as soup
  5. from collections import Counter
  6. import operator
  7.  
  8. def get_html(url):
  9.     print("searching " + url)
  10.     with closing(get(url, stream=True)) as resp:
  11.         return soup(resp.content, 'html.parser')
  12.  
  13. def get_posts(html):
  14.     posts = []
  15.     for post in html.select("article"):
  16.         try:
  17.             posts.append(post.aside.h3.strong.a.text)
  18.         except:
  19.             posts.append("Guest/Banned user")
  20.     return posts
  21.  
  22. def pages(html):
  23.     for div in html.body.main.div.div.div.select("div"):
  24.         try:
  25.             if div.get("class")[0] == "cTopic" and div.get("class")[1] == "ipsClear" and div.get("class")[2] == "ipsSpacer_top":
  26.                 pages = int(div.div.ul.text.split()[div.div.ul.text.split().index("of") + 1]) #Beautiful
  27.                 print("Found " + str(pages) + " pages")
  28.                 return pages
  29.         except:
  30.             continue
  31.     return 1
  32.  
  33. def clean_input(input):
  34.     if input.find("topic") < 0:
  35.         raise ValueError("The URL does not appear to be a thread")
  36.     try:
  37.         return input.split("&")[0]
  38.     except:
  39.         return input
  40.  
  41. url = clean_input(input("Enter thread URL:"))
  42. #url = "https://forum.kerbalspaceprogram.com/index.php?/topic/180429-number-of-posts-in-a-thread/"
  43. html = get_html(url)
  44.  
  45. all_posts = []
  46.  
  47. counter = 1
  48. for page in range(pages(html)):
  49.     if counter == 1:
  50.         all_posts += get_posts(html)
  51.     else:
  52.         all_posts += get_posts(get_html(url + "&page=" + str(counter)))
  53.     counter += 1
  54.  
  55. print("---------")
  56.  
  57. occurences = Counter(all_posts)
  58. sorted_occurences = sorted(occurences.items(), key=operator.itemgetter(1))[::-1]
  59. for user in sorted_occurences:
  60.     if user[1] == 1:
  61.         print(user[0] + " posted 1 time")
  62.     else:
  63.         print(user[0] + " posted " + str(user[1]) + " times")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement