Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Simple Python example showing how to parse JSON-formatted Twitter messages+metadata
- (i.e. data produced by the Twitter status tracking API)
- This script simply creates Python lists containing the messages, locations and timezones
- of all tweets in a single JSON file.
- Author: Geert Barentsen - 4 April (#dotastro)
- """
- import sys
- import simplejson
- import difflib
- # Input argument is the filename of the JSON ascii file from the Twitter API
- filename = sys.argv[1]
- tweets_text = [] # We will store the text of every tweet in this list
- tweets_location = [] # Location of every tweet (free text field - not always accurate or given)
- tweets_timezone = [] # Timezone name of every tweet
- # Loop over all lines
- f = file(filename, "r")
- lines = f.readlines()
- for line in lines:
- try:
- tweet = simplejson.loads(line)
- # Ignore retweets!
- if tweet.has_key("retweeted_status") or not tweet.has_key("text"):
- continue
- # Fetch text from tweet
- text = tweet["text"].lower()
- # Ignore 'manual' retweets, i.e. messages starting with RT
- if text.find("rt ") > -1:
- continue
- tweets_text.append( text )
- tweets_location.append( tweet['user']['location'] )
- tweets_timezone.append( tweet['user']['time_zone'] )
- except ValueError:
- pass
- # Show result
- print tweets_text
- print tweets_location
- print tweets_timezone
Advertisement
Add Comment
Please, Sign In to add comment