Advertisement
Guest User

Untitled

a guest
Nov 13th, 2019
115
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.49 KB | None | 0 0
  1. # this file computes for the sentiment of tweets within a region
  2. import string
  3.  
  4. # the coordinates that define the geographical regions of the US
  5. p1 = (49.189787, -67.444574)
  6. p2 = (24.660845, -67.444574)
  7. p3 = (49.189787, -87.518395)
  8. p4 = (24.660845, -87.518395)
  9. p5 = (49.189787, -101.998892)
  10. p6 = (24.660845, -101.998892)
  11. p7 = (49.189787, -115.236428)
  12. p8 = (24.660845, -115.236428)
  13. p9 = (49.189787, -125.242264)
  14. p10 = (24.660845, -125.242264)
  15.  
  16. # defining each region by its coordinates
  17. East = (p1, p2, p3, p4)
  18. Central = (p3, p4, p5, p6)
  19. Mountain = (p5, p6, p7, p8)
  20. Pacific = (p7, p8, p9, p10)
  21.  
  22.  
  23. # this function checks whether the tweet is in the regions
  24. def isInRegion(coordinates, region):
  25.     # x is the x component of the coordinate
  26.     x = coordinates[0]
  27.     # y is the y component of the coordinate
  28.     y = coordinates[1]
  29.     # using lists within lists, it checks whether x and y falls within the area of the region
  30.     if region[0][0] >= x >= region[1][0] and region[1][1] >= y >= region[3][1]:
  31.         # if the tweet is within the region, it will return true
  32.         return True
  33.     else:
  34.         # if not, it will return false
  35.         return False
  36.  
  37.  
  38. # this function computes for the sentiment of tweets in each region
  39. def compute_tweets(tweetFile, sentimentFile):
  40.  
  41.     # creating a dictionary with the sentiment words; the key being the word itself and the value being the sentiment value
  42.     sentimentWords = {}
  43.  
  44.     # initializing the tweet counts, tweets with keyword count, and the total sentiment of each region
  45.     eastTweetCount = 0
  46.     eastKeywordTweetCount = 0
  47.     eastTotalSentiment = 0
  48.  
  49.     centralTweetCount = 0
  50.     centralKeywordTweetCount = 0
  51.     centralTotalSentiment = 0
  52.  
  53.     mountTweetCount = 0
  54.     mountKeywordTweetCount = 0
  55.     mountTotalSentiment = 0
  56.  
  57.     pacificTweetCount = 0
  58.     pacificKeywordTweetCount = 0
  59.     pacificTotalSentiment = 0
  60.  
  61.     # opens the sentiment text file and reads it
  62.     try:
  63.         inf = open(sentimentFile, "r", encoding="utf-8")
  64.         # for every line in the file, the characters are in lowercase and are split into 2 parts by a comma
  65.         for line in inf:
  66.             parts = line.lower()
  67.             parts = parts.split(",")
  68.             # the first part of the line is the keyword
  69.             keyword = parts[0]
  70.             # the second part of the line is the value of the keyword
  71.             keywordVal = int(parts[1])
  72.             # using the sentiment word dictionary, it relates the keyword to its value
  73.             sentimentWords[keyword] = keywordVal
  74.         # closes the sentiment file
  75.         inf.close()
  76.     # if the file cannot be found, it raises an exception and prints that the file could not be opened
  77.     except FileNotFoundError:
  78.         exit("Sorry, that file could not be opened.")
  79.  
  80.     # opens the tweet file and reads it
  81.     try:
  82.         inf = open(tweetFile, "r", encoding="utf-8")
  83.         for line in inf:
  84.             # initializes the count of keywords and sentiment value in each tweet
  85.             keywords = 0
  86.             sentiment = 0
  87.             # splits the line into five parts: the longitude, latitude, date, time, and tweet
  88.             parts = line.split(" ", 5)
  89.             # defines the x and y values used to check if the tweet is in the regions; x is longitude and y is the latitude
  90.             x = (float(parts[0].lstrip("[").rstrip(",")))
  91.             y = (float(parts[1].strip("]")))
  92.             # defines the tweet as the fifth part of the line, changes it to lowercase and strips spaces
  93.             tweet = parts[5].lower().strip()
  94.             # defines the words in the tweet
  95.             wordList = tweet.split(" ")
  96.             for word in wordList:
  97.                 # strips the punctuation found in the word
  98.                 word = word.strip(string.punctuation)
  99.                 if word in sentimentWords:
  100.                     # if a keyword is found in the words, the count of keywords in the tweet increases by one
  101.                     keywords = keywords + 1
  102.                     # the sentiment value of the word is added to the sentiment of the tweet
  103.                     sentiment = sentiment + sentimentWords[word]
  104.             # initializes the happiness to 0 so if there are no keywords in the tweet, it will return 0 as the happiness
  105.             happiness = 0
  106.             if keywords > 0:
  107.                 happiness = sentiment / keywords
  108.             if isInRegion([x,y],East):
  109.                 eastTweetCount = eastTweetCount + 1
  110.                 if keywords > 0:
  111.                     eastKeywordTweetCount = eastKeywordTweetCount + 1
  112.                 eastTotalSentiment = eastTotalSentiment + happiness
  113.             if isInRegion([x,y], Central):
  114.                 centralTweetCount = centralTweetCount + 1
  115.                 if keywords > 0:
  116.                     centralKeywordTweetCount = centralKeywordTweetCount + 1
  117.                 centralTotalSentiment = centralTotalSentiment + happiness
  118.             if isInRegion([x,y], Mountain):
  119.                 mountTweetCount = mountTweetCount +1
  120.                 if keywords > 0:
  121.                     mountKeywordTweetCount = mountKeywordTweetCount + 1
  122.                 mountTotalSentiment = mountTotalSentiment + happiness
  123.             if isInRegion([x,y], Pacific):
  124.                 pacificTweetCount = pacificTweetCount + 1
  125.                 if keywords > 0:
  126.                     pacificKeywordTweetCount = pacificKeywordTweetCount + 1
  127.                 pacificTotalSentiment = pacificTotalSentiment + happiness
  128.         inf.close()
  129.     except FileNotFoundError:
  130.          exit("Sorry, that file could not be opened.")
  131.  
  132.     eastAverage = 0
  133.     if eastKeywordTweetCount > 0:
  134.         eastAverage = round(eastTotalSentiment / eastKeywordTweetCount, 3)
  135.     centralAverage = 0
  136.     if centralKeywordTweetCount > 0:
  137.         centralAverage = round(centralTotalSentiment / centralKeywordTweetCount, 3)
  138.     mountAverage = 0
  139.     if mountKeywordTweetCount > 0:
  140.         mountAverage = round(mountTotalSentiment / mountKeywordTweetCount, 3)
  141.     pacificAverage = 0
  142.     if pacificKeywordTweetCount > 0:
  143.         pacificAverage = round(pacificTotalSentiment / pacificKeywordTweetCount, 3)
  144.  
  145.     eastRegion = (eastAverage, eastKeywordTweetCount, eastTweetCount)
  146.     centralRegion = (centralAverage, centralKeywordTweetCount, centralTweetCount)
  147.     mountRegion = (mountAverage, mountKeywordTweetCount, mountTweetCount)
  148.     pacificRegion = (pacificAverage, pacificKeywordTweetCount, pacificTweetCount)
  149.  
  150.     return [eastRegion, centralRegion, mountRegion, pacificRegion]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement