Untitled

# this file computes for the sentiment of tweets within a region
import string

# the coordinates that define the geographical regions of the US
p1 = (49.189787, -67.444574)
p2 = (24.660845, -67.444574)
p3 = (49.189787, -87.518395)
p4 = (24.660845, -87.518395)
p5 = (49.189787, -101.998892)
p6 = (24.660845, -101.998892)
p7 = (49.189787, -115.236428)
p8 = (24.660845, -115.236428)
p9 = (49.189787, -125.242264)
p10 = (24.660845, -125.242264)

# defining each region by its coordinates
East = (p1, p2, p3, p4)
Central = (p3, p4, p5, p6)
Mountain = (p5, p6, p7, p8)
Pacific = (p7, p8, p9, p10)


# this function checks whether the tweet is in the regions
def isInRegion(coordinates, region):
    # x is the x component of the coordinate
    x = coordinates[0]
    # y is the y component of the coordinate
    y = coordinates[1]
    # using lists within lists, it checks whether x and y falls within the area of the region
    if region[0][0] >= x >= region[1][0] and region[1][1] >= y >= region[3][1]:
        # if the tweet is within the region, it will return true
        return True
    else:
        # if not, it will return false
        return False


# this function computes for the sentiment of tweets in each region
def compute_tweets(tweetFile, sentimentFile):

    # creating a dictionary with the sentiment words; the key being the word itself and the value being the sentiment value
    sentimentWords = {}

    # initializing the tweet counts, tweets with keyword count, and the total sentiment of each region
    eastTweetCount = 0
    eastKeywordTweetCount = 0
    eastTotalSentiment = 0

    centralTweetCount = 0
    centralKeywordTweetCount = 0
    centralTotalSentiment = 0

    mountTweetCount = 0
    mountKeywordTweetCount = 0
    mountTotalSentiment = 0

    pacificTweetCount = 0
    pacificKeywordTweetCount = 0
    pacificTotalSentiment = 0

    # opens the sentiment text file and reads it
    try:
        inf = open(sentimentFile, "r", encoding="utf-8")
        # for every line in the file, the characters are in lowercase and are split into 2 parts by a comma
        for line in inf:
            parts = line.lower()
            parts = parts.split(",")
            # the first part of the line is the keyword
            keyword = parts[0]
            # the second part of the line is the value of the keyword
            keywordVal = int(parts[1])
            # using the sentiment word dictionary, it relates the keyword to its value
            sentimentWords[keyword] = keywordVal
        # closes the sentiment file
        inf.close()
    # if the file cannot be found, it raises an exception and prints that the file could not be opened
    except FileNotFoundError:
        exit("Sorry, that file could not be opened.")

    # opens the tweet file and reads it
    try:
        inf = open(tweetFile, "r", encoding="utf-8")
        for line in inf:
            # initializes the count of keywords and sentiment value in each tweet
            keywords = 0
            sentiment = 0
            # splits the line into five parts: the longitude, latitude, date, time, and tweet
            parts = line.split(" ", 5)
            # defines the x and y values used to check if the tweet is in the regions; x is longitude and y is the latitude
            x = (float(parts[0].lstrip("[").rstrip(",")))
            y = (float(parts[1].strip("]")))
            # defines the tweet as the fifth part of the line, changes it to lowercase and strips spaces
            tweet = parts[5].lower().strip()
            # defines the words in the tweet
            wordList = tweet.split(" ")
            for word in wordList:
                # strips the punctuation found in the word
                word = word.strip(string.punctuation)
                if word in sentimentWords:
                    # if a keyword is found in the words, the count of keywords in the tweet increases by one
                    keywords = keywords + 1
                    # the sentiment value of the word is added to the sentiment of the tweet
                    sentiment = sentiment + sentimentWords[word]
            # initializes the happiness to 0 so if there are no keywords in the tweet, it will return 0 as the happiness
            happiness = 0
            if keywords > 0:
                happiness = sentiment / keywords
            if isInRegion([x,y],East):
                eastTweetCount = eastTweetCount + 1
                if keywords > 0:
                    eastKeywordTweetCount = eastKeywordTweetCount + 1
                eastTotalSentiment = eastTotalSentiment + happiness
            if isInRegion([x,y], Central):
                centralTweetCount = centralTweetCount + 1
                if keywords > 0:
                    centralKeywordTweetCount = centralKeywordTweetCount + 1
                centralTotalSentiment = centralTotalSentiment + happiness
            if isInRegion([x,y], Mountain):
                mountTweetCount = mountTweetCount +1
                if keywords > 0:
                    mountKeywordTweetCount = mountKeywordTweetCount + 1
                mountTotalSentiment = mountTotalSentiment + happiness
            if isInRegion([x,y], Pacific):
                pacificTweetCount = pacificTweetCount + 1
                if keywords > 0:
                    pacificKeywordTweetCount = pacificKeywordTweetCount + 1
                pacificTotalSentiment = pacificTotalSentiment + happiness
        inf.close()
    except FileNotFoundError:
         exit("Sorry, that file could not be opened.")

    eastAverage = 0
    if eastKeywordTweetCount > 0:
        eastAverage = round(eastTotalSentiment / eastKeywordTweetCount, 3)
    centralAverage = 0
    if centralKeywordTweetCount > 0:
        centralAverage = round(centralTotalSentiment / centralKeywordTweetCount, 3)
    mountAverage = 0
    if mountKeywordTweetCount > 0:
        mountAverage = round(mountTotalSentiment / mountKeywordTweetCount, 3)
    pacificAverage = 0
    if pacificKeywordTweetCount > 0:
        pacificAverage = round(pacificTotalSentiment / pacificKeywordTweetCount, 3)

    eastRegion = (eastAverage, eastKeywordTweetCount, eastTweetCount)
    centralRegion = (centralAverage, centralKeywordTweetCount, centralTweetCount)
    mountRegion = (mountAverage, mountKeywordTweetCount, mountTweetCount)
    pacificRegion = (pacificAverage, pacificKeywordTweetCount, pacificTweetCount)

    return [eastRegion, centralRegion, mountRegion, pacificRegion]