Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # this file computes for the sentiment of tweets within a region
- import string
- # the coordinates that define the geographical regions of the US
- p1 = (49.189787, -67.444574)
- p2 = (24.660845, -67.444574)
- p3 = (49.189787, -87.518395)
- p4 = (24.660845, -87.518395)
- p5 = (49.189787, -101.998892)
- p6 = (24.660845, -101.998892)
- p7 = (49.189787, -115.236428)
- p8 = (24.660845, -115.236428)
- p9 = (49.189787, -125.242264)
- p10 = (24.660845, -125.242264)
- # defining each region by its coordinates
- East = (p1, p2, p3, p4)
- Central = (p3, p4, p5, p6)
- Mountain = (p5, p6, p7, p8)
- Pacific = (p7, p8, p9, p10)
- # this function checks whether the tweet is in the regions
- def isInRegion(coordinates, region):
- # x is the x component of the coordinate
- x = coordinates[0]
- # y is the y component of the coordinate
- y = coordinates[1]
- # using lists within lists, it checks whether x and y falls within the area of the region
- if region[0][0] >= x >= region[1][0] and region[1][1] >= y >= region[3][1]:
- # if the tweet is within the region, it will return true
- return True
- else:
- # if not, it will return false
- return False
- # this function computes for the sentiment of tweets in each region
- def compute_tweets(tweetFile, sentimentFile):
- # creating a dictionary with the sentiment words; the key being the word itself and the value being the sentiment value
- sentimentWords = {}
- # initializing the tweet counts, tweets with keyword count, and the total sentiment of each region
- eastTweetCount = 0
- eastKeywordTweetCount = 0
- eastTotalSentiment = 0
- centralTweetCount = 0
- centralKeywordTweetCount = 0
- centralTotalSentiment = 0
- mountTweetCount = 0
- mountKeywordTweetCount = 0
- mountTotalSentiment = 0
- pacificTweetCount = 0
- pacificKeywordTweetCount = 0
- pacificTotalSentiment = 0
- # opens the sentiment text file and reads it
- try:
- inf = open(sentimentFile, "r", encoding="utf-8")
- # for every line in the file, the characters are in lowercase and are split into 2 parts by a comma
- for line in inf:
- parts = line.lower()
- parts = parts.split(",")
- # the first part of the line is the keyword
- keyword = parts[0]
- # the second part of the line is the value of the keyword
- keywordVal = int(parts[1])
- # using the sentiment word dictionary, it relates the keyword to its value
- sentimentWords[keyword] = keywordVal
- # closes the sentiment file
- inf.close()
- # if the file cannot be found, it raises an exception and prints that the file could not be opened
- except FileNotFoundError:
- exit("Sorry, that file could not be opened.")
- # opens the tweet file and reads it
- try:
- inf = open(tweetFile, "r", encoding="utf-8")
- for line in inf:
- # initializes the count of keywords and sentiment value in each tweet
- keywords = 0
- sentiment = 0
- # splits the line into five parts: the longitude, latitude, date, time, and tweet
- parts = line.split(" ", 5)
- # defines the x and y values used to check if the tweet is in the regions; x is longitude and y is the latitude
- x = (float(parts[0].lstrip("[").rstrip(",")))
- y = (float(parts[1].strip("]")))
- # defines the tweet as the fifth part of the line, changes it to lowercase and strips spaces
- tweet = parts[5].lower().strip()
- # defines the words in the tweet
- wordList = tweet.split(" ")
- for word in wordList:
- # strips the punctuation found in the word
- word = word.strip(string.punctuation)
- if word in sentimentWords:
- # if a keyword is found in the words, the count of keywords in the tweet increases by one
- keywords = keywords + 1
- # the sentiment value of the word is added to the sentiment of the tweet
- sentiment = sentiment + sentimentWords[word]
- # initializes the happiness to 0 so if there are no keywords in the tweet, it will return 0 as the happiness
- happiness = 0
- if keywords > 0:
- happiness = sentiment / keywords
- if isInRegion([x,y],East):
- eastTweetCount = eastTweetCount + 1
- if keywords > 0:
- eastKeywordTweetCount = eastKeywordTweetCount + 1
- eastTotalSentiment = eastTotalSentiment + happiness
- if isInRegion([x,y], Central):
- centralTweetCount = centralTweetCount + 1
- if keywords > 0:
- centralKeywordTweetCount = centralKeywordTweetCount + 1
- centralTotalSentiment = centralTotalSentiment + happiness
- if isInRegion([x,y], Mountain):
- mountTweetCount = mountTweetCount +1
- if keywords > 0:
- mountKeywordTweetCount = mountKeywordTweetCount + 1
- mountTotalSentiment = mountTotalSentiment + happiness
- if isInRegion([x,y], Pacific):
- pacificTweetCount = pacificTweetCount + 1
- if keywords > 0:
- pacificKeywordTweetCount = pacificKeywordTweetCount + 1
- pacificTotalSentiment = pacificTotalSentiment + happiness
- inf.close()
- except FileNotFoundError:
- exit("Sorry, that file could not be opened.")
- eastAverage = 0
- if eastKeywordTweetCount > 0:
- eastAverage = round(eastTotalSentiment / eastKeywordTweetCount, 3)
- centralAverage = 0
- if centralKeywordTweetCount > 0:
- centralAverage = round(centralTotalSentiment / centralKeywordTweetCount, 3)
- mountAverage = 0
- if mountKeywordTweetCount > 0:
- mountAverage = round(mountTotalSentiment / mountKeywordTweetCount, 3)
- pacificAverage = 0
- if pacificKeywordTweetCount > 0:
- pacificAverage = round(pacificTotalSentiment / pacificKeywordTweetCount, 3)
- eastRegion = (eastAverage, eastKeywordTweetCount, eastTweetCount)
- centralRegion = (centralAverage, centralKeywordTweetCount, centralTweetCount)
- mountRegion = (mountAverage, mountKeywordTweetCount, mountTweetCount)
- pacificRegion = (pacificAverage, pacificKeywordTweetCount, pacificTweetCount)
- return [eastRegion, centralRegion, mountRegion, pacificRegion]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement