Advertisement
Guest User

Activity Counter

a guest
Jun 2nd, 2020
252
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.13 KB | None | 0 0
  1. import requests #import library to fetch the site
  2. from bs4 import BeautifulSoup #import library to parse site content
  3.  
  4. while True: #make it so that you can get the post count for multiple threads without reopening the program
  5.     while True: #loop until the user enters a valid URL
  6.         try:
  7.             root = input('Enter base url: ') #tell the user to enter the base URL
  8.             site = requests.get(root) #request the site
  9.             siteContent = BeautifulSoup(site.content, 'html.parser') #get the site HTML content
  10.             break
  11.         except:
  12.             continue
  13.     try:
  14.         pages = int(siteContent.find("a", {"rel": "last"})['data-page']) #get the number of pages to go through, based on the page directory
  15.     except TypeError:
  16.         pages = 1
  17.  
  18.     #context: each post has a unique ID. These IDs are in chronological order, so they can be used to get a subset of a thread.
  19.     try: #ask for start ID (inclusive)
  20.         startPoint = int(input("Enter start ID (optional): "))
  21.     except:
  22.         startPoint = 0
  23.     try: #ask for end ID (inclusive)
  24.         endPoint = int(input("Enter end ID (optional): "))
  25.     except:
  26.         endPoint = 10000000
  27.  
  28.     userActivity = {}
  29.     longestUser = 0
  30.  
  31.     for page in range(1, pages+1): #cycle through the pages
  32.         newURL = root + '?page=' + str(page) #get the URL for the current page
  33.         currentPage = requests.get(newURL) #request new page
  34.         currentContent = BeautifulSoup(currentPage.content, 'html.parser') #get new page content
  35.  
  36.         IDs = currentContent.find_all(lambda tag: tag.name == 'a' and tag.get('class') == ['ipsType_blendLinks']) #fetch all IDs
  37.         IDList = list(map(lambda x: int(x.get('href')[len(root)+24:]), IDs))
  38.         if IDList[0] > endPoint:
  39.             break
  40.         elif IDList[len(IDList)-1] < startPoint:
  41.             continue
  42.  
  43.         users = currentContent.find_all("a", {"class": "ipsType_break"}) #find all usernames in the thread
  44.         users = list(map(lambda x: x.get_text(), users))[1:] #get the username text, and remove the first username (since it's the OP's name)
  45.         userNums = list(filter(lambda x: startPoint <= IDList[x] <= endPoint, range(len(users)))) #filter out invalid posts
  46.         for userNum in userNums:
  47.             user = users[userNum] #get the relevant user
  48.             if(len(user) > longestUser): #if a user has the longest name, set it as the new longest name
  49.                 longestUser = len(user)
  50.             try: #attempt to add one to the associated user, and if the user is not in the dictionary yet, intialize a key for them with an intial value of one
  51.                 userActivity[user] += 1
  52.             except KeyError:
  53.                 userActivity[user] = 1
  54.  
  55.     for i in sorted (userActivity.keys()): #cycle through all the listed users (sorted alphabetically)
  56.         toPrint = i + ' '*(longestUser-len(i)) + ' | ' + str(userActivity[i]) #create a row to print and make sure that they're lined up neatly
  57.         print(toPrint) #print the line
  58.     toPrint = "Total" + ' '*(longestUser-5) + ' | ' + str(sum(userActivity.values())) #get the total number of posts
  59.     print(toPrint)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement