Advertisement
Guest User

Untitled

a guest
Nov 13th, 2019
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.13 KB | None | 0 0
  1. # Assignment 3 - Sentiment Analysis
  2.  
  3. # This is Kim Tan Pham | Student Number: 251068524
  4. # From Section 001 Bauer
  5. # This is my Sentiment Analysis Module
  6.  
  7. import string                       # this statement imports the string module - I will be using it to strip punctuation
  8.  
  9.  
  10. ## VARIABLES ##
  11.  
  12. latMax = 49.189787                  # initializing the maximum and minimum of the latitude
  13. latMin = 24.660845                  # since the Eastern, Central, Mountain, and Pacific all have a latitude of
  14.  
  15. easternLow = -87.518395             # initializing the boundaries of regions
  16. easternHigh = -67.444574
  17. centralLow = -101.998892
  18. centralHigh = -87.518395
  19. mountainLow = -115.236428
  20. mountainHigh = -101.998892
  21. pacificLow = -125.242264
  22. pacificHigh = -115.236428
  23.  
  24.  
  25. keywordDict = {}     # initializing keywordDict
  26. emptyList = []       # initializing emptyList
  27.  
  28. eastern = 4          # initializing regions
  29. central = 3
  30. mountain = 2
  31. pacific = 1
  32. invalidRegion = -1
  33.  
  34. def timeZones(lat, long):                       # function to determine which region the tweet is from
  35.     if latMin <= lat <= latMax:                 # this statement sets boundaries for latitude
  36.         if easternLow <= long <= easternHigh:   # if statement to determine if longitude is within the Eastern region (does this for all if statements in this function)
  37.             return eastern                      # if long is within Eastern region boundaries, return eastern         (does this for all if statements in this function)
  38.  
  39.         if centralLow <= long <= centralHigh:
  40.             return central
  41.  
  42.         if mountainLow <= long <= mountainHigh:
  43.             return mountain
  44.  
  45.         if pacificLow <= long <= pacificHigh:
  46.             return pacific
  47.         else:                                  # else statement if longitude is NOT within any of the regions
  48.             return invalidRegion               # return invalidRegion instead
  49.  
  50. def happinessValue(keywordList):                        # function to determine the happiness of a word if it is in keywordList
  51.     happiness = 0                                       # initializing happiness
  52.     for keyword in keywordList:                         # for statement to add the value of the keyword in keywordList to happiness
  53.         happiness = happiness + keywordDict[keyword]    # adds value of keyword to happiness
  54.     return happiness                                    # returns the calculation from the statement above
  55.  
  56.  
  57. def compute_tweets(tweetFileInput, keywordFileInput):   # function to calculate the sentiment value of a tweet and returns a list of tuples
  58.  
  59.  
  60.     try:                                                               # try statement to test the lines within this try statement for errors
  61.         keywordFile = open(keywordFileInput, "r", encoding="utf-8")    # opens keywordFile if it exists
  62.         for w in keywordFile:              # for loop that looks at each item in keywordFile
  63.             key, value = w.split(",")      # splits the items in the file
  64.             keywordDict[key] = int(value)  # each word is assigned a value, ex: good = 7
  65.         keywordFile.close()                # closes the file
  66.     except FileNotFoundError:              # except statement when the file does not exists, this statement will run
  67.         print("An error occurred! File not found! ", "\n",emptyList)
  68.         exit()
  69.  
  70.     try:                                                           # try statement to test the lines within this try statement for errors
  71.         tweetFile = open(tweetFileInput, "r", encoding="utf-8")    # opens tweetFile if it exists
  72.  
  73.         regionDict = {}   # initializing regionDict
  74.  
  75.         for line in tweetFile:      # for statement that looks at each line in tweetFile
  76.             happyValue = 0          # initializing happyValue
  77.             line = line.lower()     # lowers and splits each line
  78.             line = line.split()
  79.             line[0] = float(line[0].strip("[").strip(","))     # strips the [ and , in the first part of the coordinate (latitude in this case)
  80.             line[1] = float(line[1].strip("]"))                # strips the ] at the end of the longitude
  81.             lat = float(line[0])         # latitude is assigned
  82.             long = float(line[1])        # longitude is assigned
  83.             region = timeZones(lat,long) # region is assigned
  84.             if region == invalidRegion:  # when region is invalidRegion, for loop will continue
  85.                 continue
  86.  
  87.             words = line[5:]    # from element 5 and beyond, the tweet begins
  88.             validList = []      # initializing validList
  89.             for w in words:     # for statement that looks at each item in words
  90.                 w = w.strip(string.punctuation)  # strips the punctuation for
  91.                 if w in keywordDict:      # if w is in keywordDict, w will get appended to validList
  92.                     validList.append(w)   # validList is a list of all the items with keywords
  93.             if region in regionDict:      # if region is in regionDict, validList will get appended to regionDict with key of region
  94.                 regionDict[region].append(validList)
  95.             else:
  96.                 regionDict[region] = []   # if region is NOT in regionDict, regionDict will be an empty list with key of region
  97.                 regionDict[region].append(validList)
  98.  
  99.         tweetFile.close()       # end of file, close loop
  100.  
  101.         regionResult = {}   # initializing regionResult
  102.         for region in regionDict:  # for statement that looks at each item in regionDict
  103.             averageHappyValue = 0                  # initializing averageHappyValue
  104.             averageSumHappyValue = 0               # initializing averageSumHappyValue
  105.             regionKeywords = regionDict[region]    # initializing regionKeywords
  106.             countOfTweets = len(regionKeywords)    # initializing countOfTweets
  107.             countOfKeywordTweets = 0               # initializing countOfKeywordTweets
  108.  
  109.             for keywordList in regionKeywords:   # for statement that looks at each item in regionKeywords
  110.                 if len(keywordList) > 0:         # if statement that will run if there is at least 1 item in keywordList,
  111.                     happyValue = happinessValue(keywordList)     # uses happinessValue function to assigns the value of that to happyValue
  112.                     countOfKeywordTweets = countOfKeywordTweets + 1   # counter for keywordtweets
  113.                     averageHappyValue = (happyValue / len(keywordList)) + averageHappyValue   # equation to determine averageHappyValue
  114.                     averageSumHappyValue = round(averageHappyValue / countOfKeywordTweets,3)  # rounds averageSumHappyValue to 3 decimal places
  115.  
  116.                     # dictionary with region as key
  117.                     regionResult[region] = {"averageSumHappyValue": averageSumHappyValue, "countOfKeywordTweets": countOfKeywordTweets, "countOfTweets": countOfTweets}
  118.  
  119.  
  120.         try:  # try statements to test the lines within this try statement for errors, if none are found then code will proceed normally
  121.             Eastern = (regionResult[eastern]["averageSumHappyValue"],regionResult[eastern]["countOfKeywordTweets"],regionResult[eastern]["countOfTweets"])
  122.         except:   # except statement if there is an error within this try statement
  123.             Eastern = (0.0, 0.0, 0.0)
  124.  
  125.         try:
  126.             Central = (regionResult[central]["averageSumHappyValue"],regionResult[central]["countOfKeywordTweets"],regionResult[central]["countOfTweets"])
  127.         except:
  128.             Central = (0.0, 0.0, 0.0)
  129.  
  130.         try:
  131.             Mountain = (regionResult[mountain]["averageSumHappyValue"],regionResult[mountain]["countOfKeywordTweets"],regionResult[mountain]["countOfTweets"])
  132.         except:
  133.             Mountain = (0.0, 0.0, 0.0)
  134.  
  135.         try:
  136.             Pacific = (regionResult[pacific]["averageSumHappyValue"],regionResult[pacific]["countOfKeywordTweets"],regionResult[pacific]["countOfTweets"])
  137.         except:
  138.             Pacific = (0.0, 0.0, 0.0)
  139.  
  140.  
  141.         return Eastern, Central, Mountain, Pacific
  142.  
  143.     except FileNotFoundError:
  144.         print("An error occurred! File not found!", "\n", emptyList)
  145.         exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement