Advertisement
Clarvel

Python filtered tweets

Jan 25th, 2016
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.08 KB | None | 0 0
  1. """
  2. README:
  3.  
  4. Matthew Russell, January 25, 2016
  5.  
  6. requires tweepy
  7.     (pip3 install tweepy)
  8.  
  9. Utilizing the tweepy module, this grabs all tweets from an account, filters
  10. the found tweets using a user defined function, and saves them to a CSV file.
  11.  
  12. save_file: the filename to save to
  13. target_username: the target account
  14. nitain_filter: the filter function used to filter nitain tweets
  15. """
  16.  
  17. import json
  18.  
  19. # https://github.com/tweepy/tweepy
  20. # for more info, http://docs.tweepy.org/en/latest/api.html
  21. import tweepy
  22.  
  23.  
  24. # Variables that contains the user credentials to access Twitter API
  25. # Get your own from https://apps.twitter.com/
  26. ACCESS_TOKEN = 'get your'
  27. ACCESS_SECRET = 'own damn'
  28. CONSUMER_KEY = 'access keys'
  29. CONSUMER_SECRET = 'and tokens'
  30.  
  31. save_file = 'nitain_tweets.csv'
  32. target_username = 'warframealerts'
  33.  
  34.  
  35. def nitain_filter(tweet):
  36.     """
  37.     edit this to determine how tweets are filtered
  38.     """
  39.     if 'nitain' in tweet.text.lower():
  40.         return True
  41.     return False
  42.  
  43. def get_all_tweets(screen_name, chunk_size=200):
  44.     """
  45.     returns all tweets from screen_name and that pass the filter_ check
  46.     You may also specify the number of chunks requested per tweepy call.
  47.  
  48.     Twitter only allows access to a users most recent 3240 tweets with this
  49.     method
  50.  
  51.     If chunk size is greater than 200, only 200 tweets are returned
  52.     """
  53.    
  54.     #authorize twitter, initialize tweepy
  55.     auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
  56.     auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
  57.     api = tweepy.API(auth)
  58.    
  59.     oldest_id = None  # id of the oldest tweet
  60.     new_tweets = []
  61.     downloaded_tweets = []
  62.  
  63.     # keep grabbing tweets until there are no tweets left to grab
  64.     while True:        
  65.         if oldest_id is None:
  66.             print("Requesting the first %i tweets" % (chunk_size))
  67.  
  68.             new_tweets = api.user_timeline(
  69.                 screen_name = screen_name,
  70.                 count=chunk_size
  71.             )
  72.         else:
  73.             print("Requesting %i tweets before id %s" % (chunk_size, oldest_id))
  74.  
  75.             # all subsiquent requests use the max_id param to prevent
  76.             # duplicates
  77.             new_tweets = api.user_timeline(
  78.                 screen_name = screen_name,
  79.                 count=chunk_size,
  80.                 max_id=oldest_id
  81.             )
  82.            
  83.         if not new_tweets:
  84.             print("Found all %i tweets" % len(downloaded_tweets))
  85.             break
  86.  
  87.         downloaded_tweets += new_tweets
  88.  
  89.         # update the id of the oldest tweet minus one, as this prevents
  90.         # grabbing the same or newer tweets (that have larger id vals)
  91.         oldest_id = new_tweets[-1].id - 1
  92.     return downloaded_tweets
  93.  
  94.  
  95. if __name__ == '__main__':
  96.     # pass in the username of the account you want to download
  97.     tweets = get_all_tweets(target_username)
  98.  
  99.     # filter tweets
  100.     tweets = [a for a in tweets if nitain_filter(a)]
  101.  
  102.     print("Found %i filtered tweets" % len(tweets))
  103.  
  104.     with open(save_file, 'w') as f:
  105.         """# use this instead ofr human readable
  106.         f.write('%s,%s,%s\n'%("id","created on","text"))
  107.  
  108.         for a in tweets:
  109.             f.write('%s\n'%(','.join([a.id_str, str(a.created_at), a.text])))
  110.         """
  111.  
  112.         # use this for quick date formatting
  113.         f.write('%s,%s\n'%("date","time"))
  114.  
  115.         for a in tweets:
  116.             f.write('%s\n'%(str(a.created_at).replace(' ', ',', 1)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement