Advertisement
Guest User

Untitled

a guest
Feb 8th, 2017
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.78 KB | None | 0 0
  1. import nltk
  2. import sys
  3. import sqlite3
  4. import string
  5. import re
  6. import praw
  7. import time
  8. import datetime
  9. import tweepy
  10. import math
  11. import praw
  12. import threading
  13. from collections import Counter
  14.  
  15. data = sqlite3.connect('users.db')
  16. cur = data.cursor()
  17. cur.execute('CREATE TABLE IF NOT EXISTS users(username, score)')
  18. data.commit()
  19.  
  20. consumer_key = 'CAhhQzaLq5R5RUdnGe5dlQRbS'
  21. consumer_secret = 'RIA7Lh4CjHYyTjpeWefDHEbnIcFpNxcWnZ1FocVy12bTp04qWM'
  22. access_token_key = '819717298479046656-k7PVX591CMT97JoKP50LTuXFzf3cyBc'
  23. access_token_secret = 'DtWb9V8YnWTgjUtjklKOA0CS8hi2MLybxdtT5QFhpnIse'
  24. twitter_uri = 'http://howtrumpareyou.com/'
  25.  
  26. auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
  27. auth.set_access_token(access_token_key, access_token_secret)
  28. api = tweepy.API(auth)
  29.  
  30. user_agent = 'Extracts text from comments on Reddit for comparison purposes'
  31. reddit = praw.Reddit(username='ComparisonBot',password='r3dd1t2016',client_id='slOtREeDSDjALg',client_secret='w0TBMNHGcZ0j9W6rqrCrUYM08nE', user_agent=user_agent)
  32.  
  33. trump = None
  34. compare = ''
  35. contrast = ''
  36.  
  37. class user(object):
  38.  
  39.     def __init__(self, account):
  40.         self.account = account
  41.  
  42.     def get_information(self):
  43.         tweet_list = []
  44.         master = []
  45.         user_tweets = api.user_timeline(self.account, count=200)
  46.         for tweet in user_tweets:
  47.             tweet_list.append(tweet)
  48.         last = tweet_list[-1].id - 1
  49.         while len(user_tweets) > 0:
  50.             user_tweets = api.user_timeline(self.account, count=200, max_id=last)
  51.             for tweet in user_tweets:
  52.                 tweet_list.append(tweet)
  53.             last = tweet_list[-1].id - 1
  54.         for tweet in tweet_list:
  55.             tweet = str(tweet.text).replace('the','')
  56.             tweet = tweet.replace('be','')
  57.             tweet = tweet.replace('to','')
  58.             tweet = tweet.replace('of','')
  59.             tweet = tweet.replace('and','')
  60.             tweet = tweet.replace('  ',' ')
  61.             master.append(tweet)
  62.         return master
  63.  
  64.     def sort_information(self):
  65.         tweets = self.get_information()
  66.         tweet_words = nltk.word_tokenize(str(tweets))
  67.         return (tweet_words,tweets)
  68.  
  69. class comparison(object):
  70.  
  71.     def __init__(self, person):
  72.         self.person = user(person).sort_information()
  73.         self.name = person
  74.         self.response()
  75.  
  76.     def calculate_similarity(self):
  77.         argument_1 = Counter(trump[0])
  78.         argument_2 = Counter(self.person[0])
  79.         terms = set(argument_1).union(argument_2)
  80.         product = sum(argument_1.get(i,0) * argument_2.get(i,0) for i in terms)
  81.         first = math.sqrt(sum(argument_1.get(i,0)**2 for i in terms))
  82.         second = math.sqrt(sum(argument_2.get(i,0)**2 for i in terms))
  83.         solution = product/(first*second)
  84.         length_1 = sum(argument_1.values())
  85.         length_2 = sum(argument_2.values())
  86.         lengths = min(length_1,length_2) / float(max(length_1,length_2))
  87.         similarity = round(lengths*solution * 100,2)
  88.         return similarity
  89.  
  90.     def sentence_similarity(self):
  91.         sim = self.calculate_similarity()
  92.         arg = Counter(str(trump[1]))
  93.         kwarg = Counter(str(self.person[1]))
  94.         intercept = set(arg.keys()) & (set(kwarg.keys()))
  95.         num = sum([arg[i] * kwarg[i] for i in intercept])
  96.         first = sum([arg[i]**2 for i in arg.keys()])
  97.         second = sum([kwarg[i]**2 for i in kwarg.keys()])
  98.         den = math.sqrt(first) * math.sqrt(second)
  99.         if not den:
  100.             similarity = 0.0
  101.         else:
  102.             similarity = float(num)/den
  103.         similarity = round((similarity*sim),2)
  104.         return similarity
  105.  
  106.     def response(self):
  107.         similarity = self.sentence_similarity()
  108.         print('{} is {} percent like {}!'.format(self.name,similarity,compare))
  109.  
  110. class redditor_(object):
  111.  
  112.     def __init__(self,account):
  113.         self.account = account
  114.  
  115.     def get_information(self):
  116.         comment_list = []
  117.         user = reddit.redditor(str(self.account))
  118.         [comment_list.append(str(comment.body)) for comment in user.comments.new(limit=5000)]
  119.         return comment_list
  120.  
  121.     def sort_information(self):
  122.         comments = self.get_information()
  123.         comment_words = nltk.word_tokenize(str(comments))
  124.         return (comment_words, comments)
  125.  
  126. class reddit_comparison(object):
  127.  
  128.     def __init__(self, person):
  129.         self.person = redditor_(person).sort_information()
  130.         self.name = person
  131.         self.response()
  132.  
  133.     def calculate_similarity(self):
  134.         argument_1 = Counter(trump[0])
  135.         argument_2 = Counter(self.person[0])
  136.         terms = set(argument_1).union(argument_2)
  137.         product = sum(argument_1.get(i,0) * argument_2.get(i,0) for i in terms)
  138.         first = math.sqrt(sum(argument_1.get(i,0)**2 for i in terms))
  139.         second = math.sqrt(sum(argument_2.get(i,0)**2 for i in terms))
  140.         solution = product/(first*second)
  141.         length_1 = sum(argument_1.values())
  142.         length_2 = sum(argument_2.values())
  143.         lengths = min(length_1,length_2) / float(max(length_1,length_2))
  144.         similarity = round(lengths*solution * 100,2)
  145.         return similarity
  146.  
  147.     def sentence_similarity(self):
  148.         sim = self.calculate_similarity()
  149.         arg = Counter(str(trump[1]))
  150.         kwarg = Counter(str(self.person[1]))
  151.         intercept = set(arg.keys()) & (set(kwarg.keys()))
  152.         num = sum([arg[i] * kwarg[i] for i in intercept])
  153.         first = sum([arg[i]**2 for i in arg.keys()])
  154.         second = sum([kwarg[i]**2 for i in kwarg.keys()])
  155.         den = math.sqrt(first) * math.sqrt(second)
  156.         if not den:
  157.             similarity = 0.0
  158.         else:
  159.             similarity = float(num)/den
  160.         similarity = round((similarity*sim),2)
  161.         return similarity
  162.  
  163.     def response(self):
  164.         similarity = self.sentence_similarity()
  165.         print('{} is {} percent like {} as of {}!'.format(self.name,similarity,compare, time.strftime('%D')))
  166.  
  167. def main():
  168.     global trump, compare, contrast
  169.     account_type = int(input('Is this a Reddit (1) or Twitter (2) account? '))
  170.     if account_type == 2:
  171.         compare = 'realDonaldTrump'
  172.         contrast = input('Compare to: ')
  173.         trump = user(compare).sort_information()
  174.         comparison(contrast)
  175.     else:
  176.         compare = input('Reddit username 1: ')
  177.         contrast = input('Reddit username 2: ')
  178.         print('Calculating...')
  179.         trump = redditor_(compare).sort_information()
  180.         reddit_comparison(contrast)
  181.  
  182. if __name__ == '__main__':
  183.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement