Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import sys, json, string, traceback
- # this program takes two command line arguements a termfile and a tweet_file
- # the term_file I have used is AFINN-111 (available online)
- # I used the twitterstream.py module to generate the tweet file
- # globals
- scores = {}
- debug_on = False
- def get_tweet_score(tweet):
- total_score, count = 0, 0
- try:
- # get string of the terms (the text of the tweet)
- terms_str = tweet["text"]
- # strip punctuation and convert terms to list
- terms = [x.strip(string.punctuation).encode('ascii', 'ignore') for x in terms_str.split()]
- # lookup score for each term or assign 0 if not found
- for term in terms:
- if len(term) > 2:
- try:
- s = scores[term.lower()]
- except:
- s = 0
- if debug_on: print '....debug - score for term:', term, ':', s
- total_score += s
- count += 1
- tweet_ave = float(total_score) / count
- except:
- tweet_ave = 0
- exc_type, exc_value, exc_traceback = sys.exc_info()
- lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
- if debug_on: print 'Err point 1:'
- if debug_on: print ''.join('!! ' + line for line in lines)
- return float(tweet_ave)
- def get_scores(term_file):
- global scores
- for line in term_file:
- term, score = line.split("\t") #tab character
- scores[term] = int(score)
- def main():
- # get terms and their scores..
- term_file = open(sys.argv[1])
- get_scores(term_file)
- # get tweets from file...
- tweet_file = open(sys.argv[2])
- tweets = map(json.loads, tweet_file) #creates a list of dictionaries (one per tweet)
- # get sentiment score for each tweet...
- for tweet in tweets:
- print get_tweet_score(tweet)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement