Guest User

Untitled

a guest
Jul 17th, 2018
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.96 KB | None | 0 0
  1. import tweepy
  2.  
  3. # creating authentication
  4. auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
  5. auth.set_access_token(access_token, access_token_secret)
  6.  
  7. # connecting to the Twitter API using the above authentication
  8. api = tweepy.API(auth)
  9.  
  10. cities_data = {
  11. 'Akron': {'region': 'Midwest', 'state': 'Ohio'},
  12. 'Albuquerque': {'region': 'West', 'state': 'New Mexico'},
  13. }
  14.  
  15. # let's say we want to gauge how Americans feel about the immigration
  16. # policies of the Trump Administration. So the following is our query
  17. q = '(immigration OR immigrants) AND (families OR family) AND (separate OR separation) AND trump)'
  18.  
  19. # define a function to convert a list of tweets into a pandas dataframe
  20. def toDataFrame(tweets):
  21. df = pd.DataFrame()
  22. df['tweetID'] = [tweet.id for tweet in tweets]
  23. df['tweetText'] = [tweet.text for tweet in tweets]
  24. df['tweetRetweetCt'] = [tweet.retweet_count for tweet in tweets]
  25. df['tweetFavoriteCt'] = [tweet.favorite_count for tweet in tweets]
  26. df['tweetSource'] = [tweet.source for tweet in tweets]
  27. df['tweetCreated'] = [tweet.created_at for tweet in tweets]
  28. df['userID'] = [tweet.user.id for tweet in tweets]
  29. df['userScreen'] = [tweet.user.screen_name for tweet in tweets]
  30. df['userName'] = [tweet.user.name for tweet in tweets]
  31. df['userCreateDt'] = [tweet.user.created_at for tweet in tweets]
  32. df['userDesc'] = [tweet.user.description for tweet in tweets]
  33. df['userFollowerCt'] = [tweet.user.followers_count for tweet in tweets]
  34. df['userFollowingCt'] = [tweet.user.friends_count for tweet in tweets]
  35. df['userLocation'] = [tweet.user.location for tweet in tweets]
  36. df['userTimezone'] = [tweet.user.time_zone for tweet in tweets]
  37. df['coordinates'] = [tweet.coordinates for tweet in tweets]
  38. df['geolocation'] = [tweet.geo for tweet in tweets]
  39. df['place'] = [tweet.place for tweet in tweets]
  40. return df
  41.  
  42. tweets_df = pd.DataFrame() # initialize an empty global dataframe
  43.  
  44. # loop through the dictionary of cities_data and retrieve the tweets in
  45. # each city
  46. for city, city_data in cities_data.iteritems():
  47.  
  48. # get the city's place_id
  49. city_id = api.geo_search(
  50. query='{}'.format(city),
  51. wait_on_rate_limit=True,
  52. granularity='city'
  53. )[0].id
  54. cities_data[city]['id'] = city_id
  55.  
  56. city_tweets = [] # create an empty list to contain all the tweets
  57.  
  58. # get the tweets in each city
  59. for tweet in tweepy.Cursor(api.search, q=q + '-filter:retweets', contained_within=city_id, granularity='city', wait_on_rate_limit=True, lang="en").items(10):
  60. city_tweets.append(tweet)
  61.  
  62. # convert list of tweets into a pandas dataframe of tweets
  63. city_tweets_df = toDataFrame(city_tweets)
  64.  
  65. # append each row with city name, state name, and region.
  66. city_tweets_df['city'] = city
  67. city_tweets_df['state'] = city_data['state']
  68. city_tweets_df['region'] = city_data['region']
  69.  
  70. # add city dataframe to global dataframe
  71. tweets_df = tweets_df.append(city_tweets_df, ignore_index=True)
Add Comment
Please, Sign In to add comment