Advertisement
ChrisProsser

tweet_sentiment.py

Jun 28th, 2013
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.94 KB | None | 0 0
  1. #!/usr/bin/python
  2. import sys, json, string, traceback
  3.  
  4. # this program takes two command line arguements a termfile and a tweet_file
  5. # the term_file I have used is AFINN-111 (available online)
  6. # I used the twitterstream.py module to generate the tweet file
  7.  
  8. # globals
  9. scores = {}
  10. debug_on = False
  11.  
  12. def get_tweet_score(tweet):
  13.     total_score, count = 0, 0
  14.     try:
  15.         # get string of the terms (the text of the tweet)
  16.         terms_str = tweet["text"]
  17.  
  18.         # strip punctuation and convert terms to list
  19.         terms = [x.strip(string.punctuation).encode('ascii', 'ignore') for x in terms_str.split()]
  20.  
  21.         # lookup score for each term or assign 0 if not found
  22.         for term in terms:
  23.             if len(term) > 2:
  24.                 try:
  25.                     s = scores[term.lower()]
  26.                 except:
  27.                     s = 0
  28.                 if debug_on: print '....debug - score for term:', term, ':', s
  29.                 total_score += s
  30.                 count += 1
  31.  
  32.         tweet_ave = float(total_score) / count
  33.        
  34.     except:
  35.         tweet_ave = 0
  36.         exc_type, exc_value, exc_traceback = sys.exc_info()
  37.         lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
  38.         if debug_on: print 'Err point 1:'
  39.         if debug_on: print ''.join('!! ' + line for line in lines)
  40.  
  41.     return float(tweet_ave)
  42.  
  43. def get_scores(term_file):
  44.     global scores
  45.     for line in term_file:
  46.         term, score  = line.split("\t") #tab character
  47.         scores[term] = int(score)
  48.  
  49. def main():
  50.     # get terms and their scores..
  51.     term_file = open(sys.argv[1])
  52.     get_scores(term_file)
  53.  
  54.     # get tweets from file...
  55.     tweet_file = open(sys.argv[2])
  56.     tweets = map(json.loads, tweet_file) #creates a list of dictionaries (one per tweet)
  57.  
  58.     # get sentiment score for each tweet...
  59.     for tweet in tweets:
  60.         print get_tweet_score(tweet)
  61.  
  62. if __name__ == '__main__':
  63.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement