Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import division
- import praw
- import operator
- import datetime
- import types
- client = praw.Reddit(user_agent="Super Cool Circlebroke thing by /u/SuperCyan")
- client.set_oauth_app_info("--Redacted--")
- #Raw data storage
- submissions = [] #Stores Submission objects
- authors = [] #Stores usernames, total karma, and character count
- subreddits = [] #Stores thread links from Submission bodies
- dates = []
- valid_submission_count = 0 #Going to be used to get the top 100 posts of the year that meet the given criteria
- #Processed data storage
- most_verbose_posters = []
- best_posters_by_karma = []
- best_posters_by_count = []
- top_subreddits_by_count = []
- top_subreddits_by_karma = []
- top_subreddits_by_average_karma = []
- most_active_dates = []
- average_characters = 0
- average_karma = 0
- average_user_posts = 0
- total_karma = 0
- def main():
- get_data()
- process_general_information()
- process_user_data()
- process_subreddit_data()
- process_dates()
- print "CIRCLEBROKE STATS \n \n"
- print "GENERAL \n -------"
- print "Total Karma: " + str(round(total_karma))
- print "Average Karma per post: " + str(round(average_karma))
- print "Average number of characters per post: " + str(round(average_characters))
- print "Average number of posts per user: " + str(round(average_user_posts))
- print "USERS \n------"
- print "Top 5 Best Posters by karma"
- for i in range(5):
- user = best_posters_by_karma[i]
- print str(i) + ". " + user["username"] + ": " + str(user["karma"]) + "(Average karma: " + str(round(user["average_karma"])) + ") (" + str(round((user["karma"]/total_karma)*100,2)) +" % of all karma)"
- print
- print "Top 5 best posters by submission count"
- for i in range(5):
- user = best_posters_by_count[i]
- print str(i) + ". " + user["username"] + ": " + str(round(user["submission_count"]))
- print
- print "Most Verbose posters"
- for i in range(5):
- user = most_verbose_posters[i]
- print str(i) + ". " + user["username"] + ": " + str(round(user["average_character_count"]))
- print "\n"
- print "SUBREDDITS \n ----------"
- print "Top 5 Subreddits by post count"
- for i in range(5):
- sub = top_subreddits_by_count[i]
- print str(i) + ". " + sub["subreddit"] + ": " + str(round(sub["count"]))
- print
- print "Top 5 Subreddits by average karma"
- for i in range(5):
- sub = top_subreddits_by_karma[i]
- print str(i) + ". " + sub["subreddit"] + ": " + str(round(sub["average_karma"])) + "(" + str(round((sub["karma"]/total_karma)*100,2)) + "% of all karma)"
- print
- print "Top 5 Subreddits by total karma"
- for i in range(5):
- sub = top_subreddits_by_karma[i]
- print str(i) + ". " + sub["subreddit"] + ": " + str(round(sub["karma"])) + "(" + str(round((sub["karma"]/total_karma)*100,2)) + "% of all karma)"
- print
- print "DATE \n ----"
- for i in range(5):
- date = most_active_dates[i]
- print str(i) + ". " + str(date["date"]) + ": " + str(date["count"])
- def process_general_information():
- global average_characters, average_karma, total_karma, average_user_posts
- #Gets the average number of characters per post
- total_characters = 0
- for author in authors:
- total_characters += author["character_count"]
- average_characters = total_characters / valid_submission_count
- #Gets the average karma per post
- total_karma = 0
- for author in authors:
- total_karma += author["karma"]
- average_karma = total_karma / valid_submission_count
- average_user_posts = valid_submission_count / len(authors)
- def process_dates():
- """
- Gets the most active dates
- """
- print "Processing dates..."
- dates.sort(key=operator.itemgetter("count"), reverse=True)
- #Gets the top
- for i in range(5):
- most_active_dates.append(dates[i])
- print "Dates processed!"
- def process_subreddit_data():
- """
- Gets the top 5 subreddits by post count and total karma
- """
- print "Processing subreddit data... (" + str(len(subreddits)) + " subreddits)"
- #Gets the average karma for each subreddit
- for sub in subreddits:
- sub["average_karma"] = sub["karma"] / sub["count"]
- #Gets the top subreddits by count
- subreddits.sort(key=operator.itemgetter("count"), reverse=True)
- for i in range(5):
- top_subreddits_by_count.append(subreddits[i])
- #Gets the top subreddits by count
- subreddits.sort(key=operator.itemgetter("average_karma"), reverse=True)
- for i in range(5):
- top_subreddits_by_average_karma.append(subreddits[i])
- #Gets the top subreddits by total karma
- subreddits.sort(key=operator.itemgetter("karma"), reverse=True)
- for i in range(5):
- top_subreddits_by_karma.append(subreddits[i])
- def process_user_data():
- """
- Gets the top 5 users by karma, post count, and average length of posts
- """
- print "Processing user data... (" + str(len(authors)) + " authors)"
- #Gets the average post length and karma for all posters
- for author in authors:
- author["average_character_count"] = author["character_count"] / author["submission_count"]
- author["average_karma"] = author["karma"] / author["submission_count"]
- #Gets the top 5 posters by average post length
- authors.sort(key=operator.itemgetter("average_character_count"), reverse=True)
- for i in range(5):
- most_verbose_posters.append(authors[i])
- #Gets the top 5 posters by karma
- authors.sort(key=operator.itemgetter("karma"), reverse=True)
- for i in range(5):
- best_posters_by_karma.append(authors[i])
- #Gets the top 5 posters by submission count
- authors.sort(key=operator.itemgetter("submission_count"), reverse=True)
- for i in range(5):
- best_posters_by_count.append(authors[i])
- print "User data processed!"
- def get_data():
- global valid_submission_count
- """
- Gets Reddit data from /r/circlebroke
- """
- firstRun = True
- limit = 200
- submission_stream = client.get_subreddit("circlebroke").get_top_from_year(limit=limit) #I'd use a stream here, but a couple tests yielded weird results
- current_submission_index = 0 #Keeps track of where in the raw post list we're working with
- #Grabs data from /r/circlebroke
- print "Getting info from /r/circlebroke..."
- while valid_submission_count < 100:
- for submission in submission_stream:
- submission_stream = client.get_subreddit("circlebroke").get_top_from_year(limit=limit)
- current_submission_index += 1
- #Adds submission to list if it's a self text and contains a link to within Reddit
- if submission.is_self and (submission.selftext.find("https://np.reddit.com") != -1 or submission.selftext.find("https://www.np.reddit.com") != -1) and submission.selftext.find("/r/") != -1:
- valid_submission_count += 1
- body = submission.selftext;
- date = datetime.date.fromtimestamp(submission.created)
- sub_index = body.find("/r/", body.find("https://np."))
- sub = body[sub_index: body.find("/",sub_index+3)]
- if type(submission.author) == types.NoneType:
- pass
- else:
- author = submission.author.name
- if firstRun:
- subreddits.append({"subreddit": sub, "count":1, "karma":submission.score})
- dates.append({"date":date, "count":1})
- authors.append({'username':author, 'karma':submission.score, "character_count":len(body), "submission_count":1})
- firstRun = False
- else:
- #Documents user information
- found = False
- #Updates user information if they're already stored
- for authorDict in authors:
- if authorDict["username"] == author:
- authorDict["karma"] += submission.score
- authorDict["character_count"] += len(body)
- authorDict["submission_count"] += 1
- found = True
- #Adds a new entry for the user if they're new
- if not found:
- authors.append({'username':author, 'karma':submission.score, "character_count":len(body), "submission_count":1})
- #Documents subreddit info
- found = False
- #Updates subreddit information if it's already stored
- for subreddit in subreddits:
- if sub == subreddit["subreddit"]:
- subreddit["count"] += 1
- subreddit["karma"] += submission.score
- found = True
- #Adds a new entry for the subreddit if it's new
- if not found:
- subreddits.append({"subreddit": sub, "count":1, "karma":submission.score})
- #Documents dates
- found = False
- #Updates date info if it's already stored
- for dateDict in dates:
- if dateDict == date:
- dateDict["count"] += 1
- #Adds a new entry for the date if it's new
- if not found:
- dates.append({"date":date, "count":1})
- #Sets up to grab however many more posts that are needed
- limit = 100 - valid_submission_count
- print "Done getting info!"
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement