tweet_sentiment.py

#!/usr/bin/python
import sys, json, string, traceback

# this program takes two command line arguements a termfile and a tweet_file
# the term_file I have used is AFINN-111 (available online)
# I used the twitterstream.py module to generate the tweet file

# globals
scores = {}
debug_on = False

def get_tweet_score(tweet):
    total_score, count = 0, 0
    try:
        # get string of the terms (the text of the tweet)
        terms_str = tweet["text"]

        # strip punctuation and convert terms to list
        terms = [x.strip(string.punctuation).encode('ascii', 'ignore') for x in terms_str.split()]

        # lookup score for each term or assign 0 if not found
        for term in terms:
            if len(term) > 2:
                try:
                    s = scores[term.lower()]
                except:
                    s = 0
                if debug_on: print '....debug - score for term:', term, ':', s
                total_score += s
                count += 1

        tweet_ave = float(total_score) / count

    except:
        tweet_ave = 0
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        if debug_on: print 'Err point 1:'
        if debug_on: print ''.join('!! ' + line for line in lines)

    return float(tweet_ave)

def get_scores(term_file):
    global scores
    for line in term_file:
        term, score  = line.split("\t") #tab character
        scores[term] = int(score)

def main():
    # get terms and their scores..
    term_file = open(sys.argv[1])
    get_scores(term_file)

    # get tweets from file...
    tweet_file = open(sys.argv[2])
    tweets = map(json.loads, tweet_file) #creates a list of dictionaries (one per tweet)

    # get sentiment score for each tweet...
    for tweet in tweets:
        print get_tweet_score(tweet)

if __name__ == '__main__':
    main()