#!/usr/bin/python import sys, json, string, traceback # this program takes two command line arguements a termfile and a tweet_file # the term_file I have used is AFINN-111 (available online) # I used the twitterstream.py module to generate the tweet file # globals scores = {} debug_on = False def get_tweet_score(tweet): total_score, count = 0, 0 try: # get string of the terms (the text of the tweet) terms_str = tweet["text"] # strip punctuation and convert terms to list terms = [x.strip(string.punctuation).encode('ascii', 'ignore') for x in terms_str.split()] # lookup score for each term or assign 0 if not found for term in terms: if len(term) > 2: try: s = scores[term.lower()] except: s = 0 if debug_on: print '....debug - score for term:', term, ':', s total_score += s count += 1 tweet_ave = float(total_score) / count except: tweet_ave = 0 exc_type, exc_value, exc_traceback = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_traceback) if debug_on: print 'Err point 1:' if debug_on: print ''.join('!! ' + line for line in lines) return float(tweet_ave) def get_scores(term_file): global scores for line in term_file: term, score = line.split("\t") #tab character scores[term] = int(score) def main(): # get terms and their scores.. term_file = open(sys.argv[1]) get_scores(term_file) # get tweets from file... tweet_file = open(sys.argv[2]) tweets = map(json.loads, tweet_file) #creates a list of dictionaries (one per tweet) # get sentiment score for each tweet... for tweet in tweets: print get_tweet_score(tweet) if __name__ == '__main__': main()