Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- README:
- Matthew Russell, January 25, 2016
- requires tweepy
- (pip3 install tweepy)
- Utilizing the tweepy module, this grabs all tweets from an account, filters
- the found tweets using a user defined function, and saves them to a CSV file.
- save_file: the filename to save to
- target_username: the target account
- nitain_filter: the filter function used to filter nitain tweets
- """
- import json
- # https://github.com/tweepy/tweepy
- # for more info, http://docs.tweepy.org/en/latest/api.html
- import tweepy
- # Variables that contains the user credentials to access Twitter API
- # Get your own from https://apps.twitter.com/
- ACCESS_TOKEN = 'get your'
- ACCESS_SECRET = 'own damn'
- CONSUMER_KEY = 'access keys'
- CONSUMER_SECRET = 'and tokens'
- save_file = 'nitain_tweets.csv'
- target_username = 'warframealerts'
- def nitain_filter(tweet):
- """
- edit this to determine how tweets are filtered
- """
- if 'nitain' in tweet.text.lower():
- return True
- return False
- def get_all_tweets(screen_name, chunk_size=200):
- """
- returns all tweets from screen_name and that pass the filter_ check
- You may also specify the number of chunks requested per tweepy call.
- Twitter only allows access to a users most recent 3240 tweets with this
- method
- If chunk size is greater than 200, only 200 tweets are returned
- """
- #authorize twitter, initialize tweepy
- auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
- auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
- api = tweepy.API(auth)
- oldest_id = None # id of the oldest tweet
- new_tweets = []
- downloaded_tweets = []
- # keep grabbing tweets until there are no tweets left to grab
- while True:
- if oldest_id is None:
- print("Requesting the first %i tweets" % (chunk_size))
- new_tweets = api.user_timeline(
- screen_name = screen_name,
- count=chunk_size
- )
- else:
- print("Requesting %i tweets before id %s" % (chunk_size, oldest_id))
- # all subsiquent requests use the max_id param to prevent
- # duplicates
- new_tweets = api.user_timeline(
- screen_name = screen_name,
- count=chunk_size,
- max_id=oldest_id
- )
- if not new_tweets:
- print("Found all %i tweets" % len(downloaded_tweets))
- break
- downloaded_tweets += new_tweets
- # update the id of the oldest tweet minus one, as this prevents
- # grabbing the same or newer tweets (that have larger id vals)
- oldest_id = new_tweets[-1].id - 1
- return downloaded_tweets
- if __name__ == '__main__':
- # pass in the username of the account you want to download
- tweets = get_all_tweets(target_username)
- # filter tweets
- tweets = [a for a in tweets if nitain_filter(a)]
- print("Found %i filtered tweets" % len(tweets))
- with open(save_file, 'w') as f:
- """# use this instead ofr human readable
- f.write('%s,%s,%s\n'%("id","created on","text"))
- for a in tweets:
- f.write('%s\n'%(','.join([a.id_str, str(a.created_at), a.text])))
- """
- # use this for quick date formatting
- f.write('%s,%s\n'%("date","time"))
- for a in tweets:
- f.write('%s\n'%(str(a.created_at).replace(' ', ',', 1)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement