Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Assignment 3 - Sentiment Analysis
- # This is Kim Tan Pham | Student Number: 251068524
- # From Section 001 Bauer
- # This is my Sentiment Analysis Module
- import string # this statement imports the string module - I will be using it to strip punctuation
- ## VARIABLES ##
- latMax = 49.189787 # initializing the maximum and minimum of the latitude
- latMin = 24.660845 # since the Eastern, Central, Mountain, and Pacific all have a latitude of
- easternLow = -87.518395 # initializing the boundaries of regions
- easternHigh = -67.444574
- centralLow = -101.998892
- centralHigh = -87.518395
- mountainLow = -115.236428
- mountainHigh = -101.998892
- pacificLow = -125.242264
- pacificHigh = -115.236428
- keywordDict = {} # initializing keywordDict
- emptyList = [] # initializing emptyList
- eastern = 4 # initializing regions
- central = 3
- mountain = 2
- pacific = 1
- invalidRegion = -1
- def timeZones(lat, long): # function to determine which region the tweet is from
- if latMin <= lat <= latMax: # this statement sets boundaries for latitude
- if easternLow <= long <= easternHigh: # if statement to determine if longitude is within the Eastern region (does this for all if statements in this function)
- return eastern # if long is within Eastern region boundaries, return eastern (does this for all if statements in this function)
- if centralLow <= long <= centralHigh:
- return central
- if mountainLow <= long <= mountainHigh:
- return mountain
- if pacificLow <= long <= pacificHigh:
- return pacific
- else: # else statement if longitude is NOT within any of the regions
- return invalidRegion # return invalidRegion instead
- def happinessValue(keywordList): # function to determine the happiness of a word if it is in keywordList
- happiness = 0 # initializing happiness
- for keyword in keywordList: # for statement to add the value of the keyword in keywordList to happiness
- happiness = happiness + keywordDict[keyword] # adds value of keyword to happiness
- return happiness # returns the calculation from the statement above
- def compute_tweets(tweetFileInput, keywordFileInput): # function to calculate the sentiment value of a tweet and returns a list of tuples
- try: # try statement to test the lines within this try statement for errors
- keywordFile = open(keywordFileInput, "r", encoding="utf-8") # opens keywordFile if it exists
- for w in keywordFile: # for loop that looks at each item in keywordFile
- key, value = w.split(",") # splits the items in the file
- keywordDict[key] = int(value) # each word is assigned a value, ex: good = 7
- keywordFile.close() # closes the file
- except FileNotFoundError: # except statement when the file does not exists, this statement will run
- print("An error occurred! File not found! ", "\n",emptyList)
- exit()
- try: # try statement to test the lines within this try statement for errors
- tweetFile = open(tweetFileInput, "r", encoding="utf-8") # opens tweetFile if it exists
- regionDict = {} # initializing regionDict
- for line in tweetFile: # for statement that looks at each line in tweetFile
- happyValue = 0 # initializing happyValue
- line = line.lower() # lowers and splits each line
- line = line.split()
- line[0] = float(line[0].strip("[").strip(",")) # strips the [ and , in the first part of the coordinate (latitude in this case)
- line[1] = float(line[1].strip("]")) # strips the ] at the end of the longitude
- lat = float(line[0]) # latitude is assigned
- long = float(line[1]) # longitude is assigned
- region = timeZones(lat,long) # region is assigned
- if region == invalidRegion: # when region is invalidRegion, for loop will continue
- continue
- words = line[5:] # from element 5 and beyond, the tweet begins
- validList = [] # initializing validList
- for w in words: # for statement that looks at each item in words
- w = w.strip(string.punctuation) # strips the punctuation for
- if w in keywordDict: # if w is in keywordDict, w will get appended to validList
- validList.append(w) # validList is a list of all the items with keywords
- if region in regionDict: # if region is in regionDict, validList will get appended to regionDict with key of region
- regionDict[region].append(validList)
- else:
- regionDict[region] = [] # if region is NOT in regionDict, regionDict will be an empty list with key of region
- regionDict[region].append(validList)
- tweetFile.close() # end of file, close loop
- regionResult = {} # initializing regionResult
- for region in regionDict: # for statement that looks at each item in regionDict
- averageHappyValue = 0 # initializing averageHappyValue
- averageSumHappyValue = 0 # initializing averageSumHappyValue
- regionKeywords = regionDict[region] # initializing regionKeywords
- countOfTweets = len(regionKeywords) # initializing countOfTweets
- countOfKeywordTweets = 0 # initializing countOfKeywordTweets
- for keywordList in regionKeywords: # for statement that looks at each item in regionKeywords
- if len(keywordList) > 0: # if statement that will run if there is at least 1 item in keywordList,
- happyValue = happinessValue(keywordList) # uses happinessValue function to assigns the value of that to happyValue
- countOfKeywordTweets = countOfKeywordTweets + 1 # counter for keywordtweets
- averageHappyValue = (happyValue / len(keywordList)) + averageHappyValue # equation to determine averageHappyValue
- averageSumHappyValue = round(averageHappyValue / countOfKeywordTweets,3) # rounds averageSumHappyValue to 3 decimal places
- # dictionary with region as key
- regionResult[region] = {"averageSumHappyValue": averageSumHappyValue, "countOfKeywordTweets": countOfKeywordTweets, "countOfTweets": countOfTweets}
- try: # try statements to test the lines within this try statement for errors, if none are found then code will proceed normally
- Eastern = (regionResult[eastern]["averageSumHappyValue"],regionResult[eastern]["countOfKeywordTweets"],regionResult[eastern]["countOfTweets"])
- except: # except statement if there is an error within this try statement
- Eastern = (0.0, 0.0, 0.0)
- try:
- Central = (regionResult[central]["averageSumHappyValue"],regionResult[central]["countOfKeywordTweets"],regionResult[central]["countOfTweets"])
- except:
- Central = (0.0, 0.0, 0.0)
- try:
- Mountain = (regionResult[mountain]["averageSumHappyValue"],regionResult[mountain]["countOfKeywordTweets"],regionResult[mountain]["countOfTweets"])
- except:
- Mountain = (0.0, 0.0, 0.0)
- try:
- Pacific = (regionResult[pacific]["averageSumHappyValue"],regionResult[pacific]["countOfKeywordTweets"],regionResult[pacific]["countOfTweets"])
- except:
- Pacific = (0.0, 0.0, 0.0)
- return Eastern, Central, Mountain, Pacific
- except FileNotFoundError:
- print("An error occurred! File not found!", "\n", emptyList)
- exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement