Advertisement
Guest User

Untitled

a guest
Nov 15th, 2018
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.56 KB | None | 0 0
  1. ##Python 3.5
  2.  
  3. import tweepy
  4. from tweepy import OAuthHandler
  5. from collections import *
  6. from nltk.tokenize import word_tokenize
  7. from nltk.corpus import stopwords
  8. import nltk
  9. import string
  10. import numpy as np
  11. from flask import Flask, json, jsonify, request, make_response
  12. # from flask_restplus import Api, Resource, fields
  13. from flask_restful import Resource, Api, reqparse
  14.  
  15. ckey = '3yI33cFzg0V5DBSTP35oSDtJE'
  16. csecret = 'nsz1YmNo431dTJAo99LhuqWiqgfs99f3eXRigN2G59cxocb6t3'
  17. atoken = '1000946197790683137-nLPGfJXdyI5OnQf18Bl6HyGgpIsDHE'
  18. asecret = 'elMUbkeKnouymctjlwhwNRlbud9Ygixpwmr7KWxz6Qgsb'
  19. simple_pos = 'ADV'
  20.  
  21.  
  22. num_of_tweets = 12
  23. num_most_common = 12
  24. auth = OAuthHandler(ckey, csecret)
  25. auth.set_access_token(atoken, asecret)
  26. apii = tweepy.API(auth)
  27. stop_words = set(stopwords.words("english"))
  28. newStopWords = ('RT', '...','https', 'amp', '&', '@')
  29. stop_words.update(newStopWords)
  30. partsos = 'ADV', 'ADP', 'ADJ', 'DET', 'NOUN', 'PRON', 'VERB'
  31. handle_input ='realDonaldTrump'
  32.  
  33.  
  34. def tweet_list(twitname):
  35.     tweet_list = apii.user_timeline(twitname, count=num_of_tweets, tweet_mode="extended")
  36.     tweet_pos_dict = {}
  37.     big_tweet = ""
  38.     for tweet in tweet_list:
  39.         if not tweet.retweeted and ('RT @' not in tweet.full_text):
  40.                 big_tweet = big_tweet + str(tweet.full_text.translate(string.punctuation).encode('ascii', 'ignore').decode("utf-8")) + " "
  41.     return big_tweet
  42.  
  43. def counter_total(tweet_string):
  44.     tweet_string = tweet_string.split()
  45.     total_words = Counter()
  46.     c = Counter() #initialize coutner object
  47.     for word in tweet_string:
  48.         filtered_sentence = []
  49.         if word not in stop_words:
  50.             filtered_sentence.append(word.encode('ascii', 'ignore').decode("utf-8")) #create new sentence with stopwords removed
  51.         c = Counter(filtered_sentence) #creates counter object from new sentence
  52.         total_words = Counter(c) + total_words #running total counter object  
  53.     total_words = total_words.most_common(30)    
  54.     x_list = []
  55.     y_list = []
  56.     for word in total_words:
  57.         x_list.append(word[0])
  58.         y_list.append(word[1])
  59.     return x_list, y_list
  60.  
  61. def simple_pos_most_common(tweet_string, simple_pos):
  62.     co = Counter()
  63.     text = word_tokenize(tweet_string)
  64.     taggedTweets = nltk.pos_tag(text, tagset='universal')
  65.     compiledWords =''
  66.     c = Counter
  67.     output_list = []
  68.     i = 0
  69.     for i in range(0,len(taggedTweets)): #for i in rnage (index 0 through length of posTagged)
  70.         if taggedTweets[i][1] == simple_pos and taggedTweets[i][0] not in stop_words: #if at current loops position i, look at index 1 in touple (word, NLTKTAG <-)
  71.             output_list.append((taggedTweets[i][0]))
  72.             c.update(taggedTweets[i][0])
  73.             i += 1
  74.     for word in output_list:
  75.         co[word] += 1
  76.     return co.most_common(num_most_common)
  77.  
  78. def plot_stuff(sorted_dict, pos):
  79.     x_list = []
  80.     y_list = []
  81.     list_30_common = sorted_dict[pos]
  82.     for i in range(0,len(list_30_common)):
  83.         x = list_30_common[i][0]
  84.         x_list.append(x)
  85.         y = list_30_common[i][1]
  86.         y_list.append(y)
  87.     return x_list, y_list
  88.  
  89. def sort_common_by_pos(tweet_string):
  90.     tweet_pos_dict = {}
  91.     for word in partsos:
  92.         a = simple_pos_most_common(tweet_string, word)
  93.         for words in a:
  94.             if words not in stop_words and "//t" not in words:
  95.                 tweet_pos_dict[word] = a    
  96.     return tweet_pos_dict
  97.  
  98. def update_pos_output(pos, usr):
  99.     tweet_string = tweet_list(usr.encode("ascii", "ignore"))
  100.     pos_sorted_words = sort_common_by_pos(tweet_string)
  101.     pos_x, pos_y = plot_stuff(pos_sorted_words, pos)
  102.     return pos_x, pos_y
  103.  
  104. def update_total_output(usr):
  105.     tweet_string = tweet_list(usr)
  106.     total_x, total_y = counter_total(tweet_string)
  107.     return total_x, total_y
  108.  
  109.  
  110. def getPos(pos,tHandle):
  111.         data_dict[pos] = update_pos_output(pos, tHandle)
  112.         return(data_dict[pos])
  113.    
  114. app = Flask(__name__)
  115. api = Api(app)
  116. noun = 'NOUN'
  117. data_dict = {}
  118.  
  119. class pos(Resource):
  120.  
  121.     def post(self):
  122.         parser = reqparse.RequestParser()
  123.         parser.add_argument('handle')
  124.         parser.add_argument('pos')
  125.         args = parser.parse_args()
  126.         toJson=[]
  127.         toJson = [args]        
  128.         return json.dumps(toJson), 201
  129.  
  130.  
  131. api.add_resource(pos, '/')
  132. if __name__ == '__main__':
  133.     app.run(debug=True)
  134.  
  135.  
  136.     # def get(self, tPos):
  137.     #     jsonList = []
  138.     #     jsonList.append(handle_input)
  139.     #     jsonList.append(getPos(tPos, handle_input))
  140.     #     return jsonify(json.dumps(jsonList))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement