Advertisement
Guest User

Untitled

a guest
Apr 30th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.28 KB | None | 0 0
  1. #ahamed4_Assign3.py
  2. #################################################
  3. ##############Import tools#######################
  4. from graphics import GraphicsWindow
  5. import happy_histogram
  6. import sys
  7.  
  8. ################################################
  9. #############Main Function######################
  10. ################################################
  11.  
  12. def main():
  13. tweets = input("Please input the twitter file name: ") #read file name
  14. keywordFile = input("Please input the keyword file name: ")
  15. try: #try the following code
  16. tweets = open(tweets,'r',encoding="utf-8") #read the tweets from the input file name
  17. p1 = [49.189787, -67.444574] #coordiante for point 1
  18. p2 = [24.660845, -67.444574] #coordiante for point 2
  19. p3 = [49.189787, -87.518395] #coordiante for point 3
  20. p4 = [24.660845, -87.518395] #coordiante for point 4
  21. p5 = [49.189787, -101.998892] #coordiante for point 5
  22. p6 = [24.660845, -101.998892] #coordiante for point 6
  23. p7 = [49.189787, -115.236428] #coordiante for point 7
  24. p8 = [24.660845, -115.236428] #coordiante for point 8
  25. p9 = [49.189787, -125.242264] #coordiante for point 9
  26. p10 = [24.660845, -125.242264] #coordiante for point 10
  27.  
  28. ############################################################
  29. ##############Calculating area for zones#####################
  30. eastern = area(p1,p2,p3,p4) #Calculating Eastern Area Zone
  31. central = area(p3,p4,p5,p6) #Calculating Central Area Zone
  32. mountain = area(p5,p6,p7,p8) #Calculating Mountian Area Zone
  33. pacific = area(p7,p8,p9,p10) #Calculating Pacific Area Zone
  34.  
  35. output = open("outEx.txt",'w')
  36. keywordsList = [] #creating a list to save keywords and their sentinal value
  37. keywords = open(keywordFile,"r",encoding="utf-8") #Importing keywords from keywords.txt file
  38. kw = keywords.readline() #Reading the first line of the keywords.txt
  39.  
  40. #############################################################
  41. #######################Accessing and modifiying the keyword file############
  42. while kw !="": #going through the keyword file
  43. #for kw in keywords:
  44. kw = cleanKeys(kw) #cleaing the kyword line using cleanKeys() funcction
  45. kw[1] = int(kw[1]) #converting kw[1] into intger for calcuation pourposes
  46.  
  47. keywordsList.append(kw) #appending kw to keywordsList list
  48. kw = keywords.readline() #reading the next line fo the file
  49.  
  50.  
  51. #############################################################
  52. #########################Creating area zone tweet storage list
  53. pacificCol=[] #creating a pacific zone tweet list
  54. mountinCol = [] #creating a Mountain zone tweet list
  55. centralCol = [] #creating a central zone tweet list
  56. easternCol = [] #creating a Eastern zone tweet list
  57. outLocCol = [] #creating a Out of zone zone tweet list
  58.  
  59. ###############################################################3
  60. ################saving the scors for each tweet to a list
  61. scorePacific = [] #creating a list to save tweet score for Pacific
  62. scoreMountin = [] #creating a list to save tweet score for Mountain
  63. scoreCentral = [] #creating a list to save tweet score for Central
  64. scoreEastern = [] #creating a list to save tweet score for Eastern
  65.  
  66.  
  67. #tweet = tweets.readline()
  68.  
  69. #################################################################
  70. ####################Accessing modifiying the tweet file along with score and tweet list
  71. for tweet in tweets:
  72. tweet = cleanKeywords(tweet) #cleaning the tweet line
  73. #using the returnArea function to pin point the location of the tweet
  74. tweetLoc = returnArea([tweet[0],tweet[1]],eastern,"Eastern", pacific,"Pacific", mountain,"Mountin",central,"Central")
  75. if tweetLoc[1] == "Eastern": #if tht tweet was from Eastern reagon
  76. easternCol.append(tweet[5].split()) #add the tweet to Eastern list and split them
  77. #print(tweet[5])
  78. score(keywordsList,tweet[5].split(),scoreEastern)#using the score method to calcualte the score
  79. elif tweetLoc[1] == "Mountin": #same description as in line 74
  80. mountinCol.append(tweet[5].split()) #same description as in line 75
  81. score(keywordsList,tweet[5].split(),scoreMountin) ##same description as in line 77
  82. elif tweetLoc[1] == "Central": #same description as in line 74
  83. centralCol.append(tweet[5].split()) #same description as in line 75
  84. score(keywordsList,tweet[5].split(),scoreCentral)#same description as in line 77
  85. elif tweetLoc[1] == "Pacific": #same description as in line 74
  86. pacificCol.append(tweet[5].split()) #same description as in line 75
  87. score(keywordsList,tweet[5].split(),scorePacific)#same description as in line 77
  88. else:
  89. outLocCol.append(tweet[5].split())
  90.  
  91. meanEastern = mean(scoreEastern) #find the mean of Eastern area
  92. meanCenteral = mean(scoreCentral) #find the mean of Centeral area
  93. meanMountain = mean(scoreMountin) #find the mean of Mountain area
  94. meanPacific = mean(scorePacific) #find the mean of Pacific area
  95.  
  96. print("number of tweets in Eastern zone is: ",len(easternCol)," Happiness score is: ",round(meanEastern,2))
  97. print("number of tweets in Centeral zone is: ",len(centralCol)," Happiness score is: ",round(meanCenteral,2))
  98. print("number of tweets in Mountain zone is: ",len(mountinCol)," Happiness score is: ",round(meanMountain,2))
  99. print("number of tweets in Pacific zone is: ",len(pacificCol)," Happiness score is: ",round(meanPacific,2))
  100.  
  101. hhist = happy_histogram #instanticating the happy_histogram Class
  102. #win= GraphicsWindow(640, 480) #defining a window size
  103. #canvas = win.canvas() #
  104. hhist.drawSimpleHistogram(meanEastern,meanCenteral,meanMountain,meanPacific) #invoking the drawSimpleHistogram() method for visual porpuses
  105. #win.wait() #to keep the canvas open
  106. tweets.close() #to close the tweet file
  107. keywords.close() #to close the keyword file
  108. except IOError : #creat an exception incase the user input the wrong file name
  109. print("Error: file was not found.")
  110. sys.exit() #it exit the program
  111.  
  112. except ValueError :#create an aexcpetion incase the user ad unreadable file
  113. print("Error: invalid file.")
  114. sys.exit() #it exits the program
  115. except RuntimeError as error :
  116. print("Error:", str(error))
  117. sys.exit() #it exits the program
  118.  
  119.  
  120. ##################################################################
  121. #########################functions################################
  122. ##################################################################
  123. ##################################################################
  124.  
  125.  
  126. ############################################
  127. #################Average functions
  128. ########The following function will calcualte the mean of a given list
  129. def mean(numbers):
  130. return float(sum(numbers)) / len(numbers) #return the sum of the numbers divded by the length of the list
  131.  
  132.  
  133. ############################################
  134. ##################Clean Keywords function
  135. #it will take a string
  136. def cleanKeywords(keywords):
  137. #will clean th string of words first from punctuation and then split them into a list
  138. keywords = keywords.replace("[","").replace("!","").replace("\n","").replace("]","").split(" ",5)
  139. #it removes comma from the first item on the list
  140. keywords[0] = keywords[0].replace(",", "")
  141. #convering the first 3 items on the list to float
  142. for i in range(3):
  143. keywords[i] = float(keywords[i])
  144. #it return the keywords into as a list
  145. return keywords
  146.  
  147. #####################################################
  148. ###############Clean keys function
  149. #basic string cleaning function that ment for keywords.txt file
  150. #you have to pass a string once you call the function
  151. def cleanKeys(keywords):
  152. #it cleans the keywords form commas and then split them into a list
  153. keywords = keywords.replace(","," ").split()
  154. #it return the opperation as a list
  155. return keywords
  156.  
  157. ##############################################
  158. #########Find the min and max of the area
  159. #it will take in coordinate as floats
  160. def area(p1,p2,p3,p4):
  161. #it organises the coordinates into x and y
  162. x = [p1[0],p2[0],p3[0],p4[0]]
  163. y = [p1[1],p2[1],p3[1],p4[1]]
  164. #it finds the max and min of x and y
  165. xMin = min(x)
  166. xMax = max(x)
  167. yMin = min(y)
  168. yMax = max(y)
  169. #it returns the area the would help find zone for each coordinate in a list table format
  170. return[[xMin,xMax],[yMin,yMax]]
  171.  
  172. ##############################################
  173. #################Checks area function########
  174. #Find if coordinate falls with in an area
  175. #it will only return a boolean based on the coordinates that is given
  176. def checkArea(cords,area):
  177. #it checks if the cordinates falls with in a given area
  178. if cords[0]> area[0][0] and cords[0]<area[0][1]:
  179. if cords[1]> area[1][0] and cords[1]<area[1][1]:
  180. #if true, it will retun true
  181. return True
  182. else:
  183. #else it will return false
  184. return False
  185. else:
  186. #if the first condition was false, it will return false
  187. return False
  188.  
  189. ##########################################################
  190. #################returns area for giving coordinates
  191. ################it will take in cooriante, area, and area name for each zone
  192. def returnArea(cords,area1,areaName1,area2,areaName2,area3,areaName3,area4,areaName4):
  193. #it checks if the coordinate falles within the area using the checkArea function
  194. #the same proccess will apply for all givin coordinate for all zone
  195. #it will return cords and areaname of for each tweets
  196. if checkArea([cords[0],cords[1]],area1) == True:
  197. return(cords,areaName1)
  198. elif checkArea([cords[0],cords[1]],area2) == True:
  199. return(cords,areaName2)
  200. elif checkArea([cords[0],cords[1]],area3) == True:
  201. return(cords,areaName3)
  202. elif checkArea([cords[0],cords[1]],area4) == True:
  203. return(cords,areaName4)
  204. else:
  205. #if it is outside of specfied zone, it will return that the area is not with in our zone
  206. return(cords,"no area in our database")
  207.  
  208.  
  209.  
  210. ######################################################3
  211. #score function
  212. #it takes in keywords, the tweets and the area
  213. #it will return by appending total to the area score list
  214.  
  215. def score(keywords,tweets,area):
  216.  
  217. total=0 #variable total is set to zero. It will keep a track of the total
  218. count = 0 #it will counting occorances
  219. for key in keywords: #it loop through each keywords
  220. for tweet in tweets: #it will loop throug all the tweets
  221. if key[0].lower() == tweet.lower(): #it checks if there is a match between a tweet word and keyword and it convert them to all small caps
  222. count = count+1 #it add counts
  223. total = total+key[1] #it sum up the total
  224. if total != 0: #if total does not equal zero
  225. total = total/count #it averages out
  226. return area.append(total) #it append the area list with the tweet score
  227. else:
  228. total = 0 #else it will return and append zero to the list
  229. return area.append(total)
  230.  
  231.  
  232.  
  233. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement