Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- import yaml
- import re
- import copy
- import string
- from praw.models import MoreComments
- class commentor:
- username = ''
- points = 0
- words = 0
- comments = 0
- top_comments = 0
- reply_comments = 0
- replies = 0
- threads = 0
- holy_shits = 0
- def __init__(self, user):
- self.username = user
- def add_comment(self):
- self.comments += 1
- def add_top_comment(self):
- self.top_comments += 1
- def add_reply(self):
- self.reply_comments += 1
- def add_karma(self, karma):
- self.points += karma
- def add_replies(self, replies):
- self.replies += replies
- def add_words(self, words):
- self.words += words
- def add_thread(self):
- self.threads += 1
- def add_holy_shits(self, num):
- self.holy_shits += num
- class ranked:
- desc = ''
- max_score = 0
- second_score = 0
- max_data = None
- second_data = None
- def __init__(self, desc):
- self.desc = desc
- def add(self, data, score):
- if score > self.max_score:
- self.second_score = self.max_score
- self.second_data = self.max_data
- self.max_score = score
- self.max_data = data
- elif score > self.second_score:
- self.second_score = score
- self.second_data = data
- def __str__(self):
- ret = self.desc + ':\n'
- ret += ' Winner: ' + self.max_data + '\n'
- ret += ' Score: ' + str(self.max_score) + '\n'
- ret += ' Runner up: ' + self.second_data + '\n'
- ret += ' Score: ' + str(self.second_score)
- return ret
- class word:
- count = 0
- def __init__(self, word):
- self.word = word
- def increment(self):
- self.count += 1
- # Load login information from config.yml
- infile = open('config.yml', 'r')
- config = yaml.load(infile)
- print("Config info loaded")
- # Login to reddit
- reddit = praw.Reddit(client_id=config['id'],
- client_secret=config['secret'],
- username=config['username'],
- password=config['password'],
- user_agent=config['agent'])
- # verify login information
- print('Logged in as', reddit.user.me())
- # Get index post
- index_submission = reddit.submission(id='559rhx')
- print('Index Title:', index_submission.title)
- # Filter index for episode discussion links
- refilter = '\[\*\*Episode [0-9]+\*\*\]\(https:\/\/redd.it\/(?P<id>[a-zA-Z0-9]+)\)'
- submission_links = re.findall(refilter, index_submission.selftext)
- print('Found', len(submission_links), 'links')
- #dictionary of commentors
- users = dict()
- words = dict()
- #sort submission_links
- submission_links.sort()
- #statistics
- deleted_comments = 0
- total_holy_shits = 0
- most_comments = ranked('Most Comments')
- most_commentors = ranked('Most Commentors in a thread')
- most_karma = ranked('Most Karma gained by a user')
- most_comments_from_user = ranked('User with the most comments')
- most_replied_to = ranked('Person with most replies')
- most_replies = ranked('Person who replied the most')
- most_top_level_comments = ranked('Person with most top comments')
- most_used_word = ranked('Most used word')
- most_holy_shits_comment = ranked('Comment with most holy shits')
- most_holy_shits_post = ranked('Post with most holy shits')
- most_words_comment = ranked('Comment with the most words')
- most_words_post = ranked('Post with the most words')
- most_words_user = ranked('User with the most words')
- holy_shit_list = list()
- full_metal_count = 0
- #translate key
- trans_key = str.maketrans({key: None for key in string.punctuation})
- # only run on first discussion thread for testing
- for submission_id in submission_links:
- # print submission info
- print(submission_id + ':')
- sub = reddit.submission(id=submission_id)
- print(' Title :', sub.title)
- print(' Date :', sub.created_utc)
- print(' Author:', sub.author.name)
- print(' Number of comments:', sub.num_comments)
- # Expand 'More comments' sections
- sub.comments.replace_more(limit=0)
- # Submission stats
- commentors = list()
- post_holy_shits = 0
- post_words = 0
- # Gather comment stats
- for comment in sub.comments.list():
- if comment.author is None:
- print(' Found deleted comment')
- deleted_comments += 1
- else:
- # Gather username stats
- name = comment.author.name
- print(' Found comment by', name)
- if (name not in users):
- users[name] = commentor(name)
- if (name not in commentors):
- commentors.append(name)
- users[name].add_thread()
- # Gather word stats
- body_copy = comment.body.lower()
- body_copy.translate(trans_key) #remove punctuation to count words
- comment_words = comment.body.split(' ')
- post_words += len(comment_words)
- for comment_word in comment_words:
- if comment_word not in words:
- words[comment_word] = word(comment_word)
- words[comment_word].increment()
- full_metals = re.findall('full metal', body_copy)
- full_metal_count += len(full_metals)
- holy_shits = len(re.findall('holy shit', body_copy))
- total_holy_shits += holy_shits
- post_holy_shits += holy_shits
- # Gather user stats
- users[name].add_comment()
- users[name].add_karma(comment.score)
- users[name].add_replies(len(comment.replies))
- users[name].add_words(len(comment_words))
- users[name].add_holy_shits(holy_shits)
- if comment.parent_id[1] is '3':
- users[name].add_top_comment()
- else:
- users[name].add_reply()
- # Gather comment stats
- url = 'https://www.reddit.com/r/anime/comments/' + sub.id + '//' + comment.id
- most_holy_shits_comment.add(url, holy_shits)
- most_words_comment.add(url, len(comment_words))
- # Gather post stats
- most_comments.add(sub.title, sub.num_comments)
- most_commentors.add(sub.title, len(commentors))
- most_words_post.add(sub.title, post_words)
- most_holy_shits_post.add(sub.title, post_holy_shits)
- holy_shit_list.append((sub.title, post_holy_shits))
- for key in users:
- user = users[key]
- print(user.username + ':')
- print(' Comments:', user.comments)
- print(' Top Comments:', user.top_comments)
- print(' Reply Comments:', user.reply_comments)
- print(' Karma:', user.points)
- print(' Words:', user.words)
- print(' Times Replied To:', user.replies)
- print(' Threads:', user.threads)
- # Run user statistics
- most_karma.add(user.username, user.points)
- most_comments_from_user.add(user.username, user.comments)
- most_replies.add(user.username, user.reply_comments)
- most_replied_to.add(user.username, user.replies)
- most_top_level_comments.add(user.username, user.top_comments)
- most_words_user.add(user.username, user.words)
- # Run Word Stats
- for key in words:
- most_used_word.add(key, words[key].count)
- # Print statistics
- print('Found Statistics:')
- print('Deleted Comments:', deleted_comments)
- print(most_comments)
- print(most_commentors)
- print(most_karma)
- print(most_comments_from_user)
- print(most_words_user)
- print(most_replied_to)
- print(most_top_level_comments)
- print(most_replies)
- print(most_used_word)
- print(most_words_comment)
- print(most_words_post)
- print(most_holy_shits_comment)
- print(most_holy_shits_post)
- print('Holy shits by post:')
- for item in holy_shit_list:
- print(' ' + item[0] + ':', item[1])
- print('Holy shit mentions:', total_holy_shits)
- print('Hype mentions:', words['hype'].count)
- print(' ')
- print('Person who wasted the most time gathering useless stats: /u/mvolling')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement