Advertisement
Guest User

/r/Circlebroke Data Script

a guest
Sep 26th, 2015
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.90 KB | None | 0 0
  1. from __future__ import division
  2. import praw
  3. import operator
  4. import datetime
  5. import types
  6.  
  7. client = praw.Reddit(user_agent="Super Cool Circlebroke thing by /u/SuperCyan")
  8. client.set_oauth_app_info("--Redacted--")
  9.  
  10. #Raw data storage
  11. submissions = [] #Stores Submission objects
  12. authors = [] #Stores usernames, total karma, and character count
  13. subreddits = [] #Stores thread links from Submission bodies
  14. dates = []
  15. valid_submission_count = 0 #Going to be used to get the top 100 posts of the year that meet the given criteria
  16.  
  17. #Processed data storage
  18. most_verbose_posters = []
  19. best_posters_by_karma = []
  20. best_posters_by_count = []
  21. top_subreddits_by_count = []
  22. top_subreddits_by_karma = []
  23. top_subreddits_by_average_karma = []
  24. most_active_dates = []
  25. average_characters = 0
  26. average_karma = 0
  27. average_user_posts = 0
  28. total_karma = 0
  29.  
  30. def main():
  31. get_data()
  32. process_general_information()
  33. process_user_data()
  34. process_subreddit_data()
  35. process_dates()
  36.  
  37.  
  38. print "CIRCLEBROKE STATS \n \n"
  39. print "GENERAL \n -------"
  40. print "Total Karma: " + str(round(total_karma))
  41. print "Average Karma per post: " + str(round(average_karma))
  42. print "Average number of characters per post: " + str(round(average_characters))
  43. print "Average number of posts per user: " + str(round(average_user_posts))
  44. print "USERS \n------"
  45. print "Top 5 Best Posters by karma"
  46.  
  47. for i in range(5):
  48. user = best_posters_by_karma[i]
  49. print str(i) + ". " + user["username"] + ": " + str(user["karma"]) + "(Average karma: " + str(round(user["average_karma"])) + ") (" + str(round((user["karma"]/total_karma)*100,2)) +" % of all karma)"
  50.  
  51. print
  52. print "Top 5 best posters by submission count"
  53. for i in range(5):
  54. user = best_posters_by_count[i]
  55. print str(i) + ". " + user["username"] + ": " + str(round(user["submission_count"]))
  56.  
  57. print
  58. print "Most Verbose posters"
  59. for i in range(5):
  60. user = most_verbose_posters[i]
  61. print str(i) + ". " + user["username"] + ": " + str(round(user["average_character_count"]))
  62.  
  63. print "\n"
  64. print "SUBREDDITS \n ----------"
  65. print "Top 5 Subreddits by post count"
  66. for i in range(5):
  67. sub = top_subreddits_by_count[i]
  68. print str(i) + ". " + sub["subreddit"] + ": " + str(round(sub["count"]))
  69.  
  70. print
  71. print "Top 5 Subreddits by average karma"
  72. for i in range(5):
  73. sub = top_subreddits_by_karma[i]
  74. print str(i) + ". " + sub["subreddit"] + ": " + str(round(sub["average_karma"])) + "(" + str(round((sub["karma"]/total_karma)*100,2)) + "% of all karma)"
  75.  
  76. print
  77. print "Top 5 Subreddits by total karma"
  78. for i in range(5):
  79. sub = top_subreddits_by_karma[i]
  80. print str(i) + ". " + sub["subreddit"] + ": " + str(round(sub["karma"])) + "(" + str(round((sub["karma"]/total_karma)*100,2)) + "% of all karma)"
  81.  
  82. print
  83. print "DATE \n ----"
  84. for i in range(5):
  85. date = most_active_dates[i]
  86. print str(i) + ". " + str(date["date"]) + ": " + str(date["count"])
  87.  
  88. def process_general_information():
  89. global average_characters, average_karma, total_karma, average_user_posts
  90. #Gets the average number of characters per post
  91. total_characters = 0
  92. for author in authors:
  93. total_characters += author["character_count"]
  94. average_characters = total_characters / valid_submission_count
  95.  
  96. #Gets the average karma per post
  97. total_karma = 0
  98. for author in authors:
  99. total_karma += author["karma"]
  100. average_karma = total_karma / valid_submission_count
  101.  
  102. average_user_posts = valid_submission_count / len(authors)
  103. def process_dates():
  104. """
  105. Gets the most active dates
  106. """
  107.  
  108. print "Processing dates..."
  109. dates.sort(key=operator.itemgetter("count"), reverse=True)
  110. #Gets the top
  111. for i in range(5):
  112. most_active_dates.append(dates[i])
  113.  
  114. print "Dates processed!"
  115.  
  116. def process_subreddit_data():
  117. """
  118. Gets the top 5 subreddits by post count and total karma
  119. """
  120.  
  121.  
  122. print "Processing subreddit data... (" + str(len(subreddits)) + " subreddits)"
  123. #Gets the average karma for each subreddit
  124. for sub in subreddits:
  125. sub["average_karma"] = sub["karma"] / sub["count"]
  126. #Gets the top subreddits by count
  127. subreddits.sort(key=operator.itemgetter("count"), reverse=True)
  128. for i in range(5):
  129. top_subreddits_by_count.append(subreddits[i])
  130.  
  131. #Gets the top subreddits by count
  132. subreddits.sort(key=operator.itemgetter("average_karma"), reverse=True)
  133. for i in range(5):
  134. top_subreddits_by_average_karma.append(subreddits[i])
  135.  
  136. #Gets the top subreddits by total karma
  137. subreddits.sort(key=operator.itemgetter("karma"), reverse=True)
  138. for i in range(5):
  139. top_subreddits_by_karma.append(subreddits[i])
  140.  
  141. def process_user_data():
  142. """
  143. Gets the top 5 users by karma, post count, and average length of posts
  144. """
  145.  
  146. print "Processing user data... (" + str(len(authors)) + " authors)"
  147. #Gets the average post length and karma for all posters
  148. for author in authors:
  149. author["average_character_count"] = author["character_count"] / author["submission_count"]
  150. author["average_karma"] = author["karma"] / author["submission_count"]
  151.  
  152. #Gets the top 5 posters by average post length
  153. authors.sort(key=operator.itemgetter("average_character_count"), reverse=True)
  154. for i in range(5):
  155. most_verbose_posters.append(authors[i])
  156.  
  157. #Gets the top 5 posters by karma
  158. authors.sort(key=operator.itemgetter("karma"), reverse=True)
  159. for i in range(5):
  160. best_posters_by_karma.append(authors[i])
  161.  
  162. #Gets the top 5 posters by submission count
  163. authors.sort(key=operator.itemgetter("submission_count"), reverse=True)
  164. for i in range(5):
  165. best_posters_by_count.append(authors[i])
  166.  
  167. print "User data processed!"
  168.  
  169. def get_data():
  170. global valid_submission_count
  171. """
  172. Gets Reddit data from /r/circlebroke
  173. """
  174. firstRun = True
  175. limit = 200
  176. submission_stream = client.get_subreddit("circlebroke").get_top_from_year(limit=limit) #I'd use a stream here, but a couple tests yielded weird results
  177. current_submission_index = 0 #Keeps track of where in the raw post list we're working with
  178.  
  179. #Grabs data from /r/circlebroke
  180. print "Getting info from /r/circlebroke..."
  181. while valid_submission_count < 100:
  182. for submission in submission_stream:
  183. submission_stream = client.get_subreddit("circlebroke").get_top_from_year(limit=limit)
  184. current_submission_index += 1
  185. #Adds submission to list if it's a self text and contains a link to within Reddit
  186. if submission.is_self and (submission.selftext.find("https://np.reddit.com") != -1 or submission.selftext.find("https://www.np.reddit.com") != -1) and submission.selftext.find("/r/") != -1:
  187. valid_submission_count += 1
  188.  
  189. body = submission.selftext;
  190. date = datetime.date.fromtimestamp(submission.created)
  191. sub_index = body.find("/r/", body.find("https://np."))
  192. sub = body[sub_index: body.find("/",sub_index+3)]
  193. if type(submission.author) == types.NoneType:
  194. pass
  195. else:
  196. author = submission.author.name
  197.  
  198. if firstRun:
  199. subreddits.append({"subreddit": sub, "count":1, "karma":submission.score})
  200. dates.append({"date":date, "count":1})
  201. authors.append({'username':author, 'karma':submission.score, "character_count":len(body), "submission_count":1})
  202. firstRun = False
  203. else:
  204. #Documents user information
  205. found = False
  206.  
  207. #Updates user information if they're already stored
  208. for authorDict in authors:
  209. if authorDict["username"] == author:
  210. authorDict["karma"] += submission.score
  211. authorDict["character_count"] += len(body)
  212. authorDict["submission_count"] += 1
  213. found = True
  214. #Adds a new entry for the user if they're new
  215. if not found:
  216. authors.append({'username':author, 'karma':submission.score, "character_count":len(body), "submission_count":1})
  217.  
  218. #Documents subreddit info
  219. found = False
  220.  
  221. #Updates subreddit information if it's already stored
  222. for subreddit in subreddits:
  223. if sub == subreddit["subreddit"]:
  224. subreddit["count"] += 1
  225. subreddit["karma"] += submission.score
  226. found = True
  227. #Adds a new entry for the subreddit if it's new
  228. if not found:
  229. subreddits.append({"subreddit": sub, "count":1, "karma":submission.score})
  230.  
  231. #Documents dates
  232. found = False
  233. #Updates date info if it's already stored
  234. for dateDict in dates:
  235. if dateDict == date:
  236. dateDict["count"] += 1
  237. #Adds a new entry for the date if it's new
  238. if not found:
  239. dates.append({"date":date, "count":1})
  240.  
  241. #Sets up to grab however many more posts that are needed
  242. limit = 100 - valid_submission_count
  243. print "Done getting info!"
  244. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement