Advertisement
Guest User

Untitled

a guest
May 21st, 2019
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.36 KB | None | 0 0
  1. import operator
  2. import pandas as pd
  3. import math
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. import random
  7.  
  8. # the first two lines do the same thing as read.csv
  9. routes = []
  10. with open('routes.csv', 'r') as file:
  11. # apre il file e riempie 'routes' con le righe del file
  12. routes = file.read().splitlines()
  13. # for each element in 'routes', divide the string around the ';'
  14. routes = [route.split(';') for route in routes]
  15.  
  16.  
  17. aircodes = []
  18. with open('airport-codes.csv', 'r') as file:
  19. aircodes = file.read().splitlines()
  20. aircodes = [code.split(';') for code in aircodes]
  21.  
  22.  
  23. data = pd.read_csv('countries of the world.csv', decimal=',')
  24.  
  25. #Transform aircodes in dictionary + association of airport and nation
  26.  
  27. airport_nation = {}
  28.  
  29. for r in aircodes:
  30. airport_code = r[1]
  31. country = r[3]
  32. if not airport_code in airport_nation:
  33. airport_nation[airport_code] = country
  34.  
  35. print(airport_code, country) #example of how this cell works
  36. print(airport_nation['GZA']) #example of how I get the country from the abbreviate form of the airport
  37.  
  38. #Dictionary to make list with the abbreviations of the airports of departure (first 3 letters) and of arrival (second 3 letters), frequency
  39.  
  40. coppie = {}
  41.  
  42. for r in routes:
  43. source_airport = r[2]
  44. destination_airport = r[4]
  45. aircoppia = ''.join(sorted([source_airport, destination_airport]))
  46. if aircoppia in coppie:
  47. coppie[aircoppia] += 1
  48. else:
  49. coppie[aircoppia] = 1
  50.  
  51. coppie = sorted(coppie.items(), key=operator.itemgetter(1))
  52.  
  53. print(coppie[1]) #example of how it works, it works in reverse order (the one with less frequency first)
  54. print(coppie[-1]) #most busy
  55. print(len(coppie)) #test for know number of routes
  56. print(len(routes)) #tests to find out about total journeys
  57.  
  58. #Starting from trying to make a map, the frequency of flights was found to be irregular, so using pyplot we printed a graph that reveals that return flights are much more popular than one-way flights
  59.  
  60. collegamenti = [] #collegamenti means: number of times that route appears (not considering the order)
  61. freq_collegamento = [] #summ of sections that have n connections
  62. dist = [x[1] for x in coppie]
  63. for y in set([x[1] for x in coppie]):
  64. if not y > 25: #Elimination of outliers
  65. print(y, dist.count(y))
  66. collegamenti += [y]
  67. freq_collegamento += [dist.count(y)]
  68.  
  69. plt.yscale('log')#log to detect distribution that is exponential
  70. plt.plot(collegamenti,freq_collegamento)
  71. plt.plot(collegamenti[1::2],freq_collegamento[1::2])#take peers (round trip)
  72. plt.plot(collegamenti[::2],freq_collegamento[::2])#takes the odd (one way or only return)
  73. plt.show()
  74.  
  75. # We divide the number of pairs of airports that have an even number of connections for one that has an odd connection number and we see that the even ones are 11.39 times the odd number so there are more round trips than one way
  76.  
  77. float(sum(freq_collegamento[1::2])) / sum(freq_collegamento[::2])
  78.  
  79. errors = 0
  80. nations = {}
  81.  
  82. for x in coppie:
  83. a1, a2, conteggio = x[0][:3], x[0][-3:], x[1]
  84. try:
  85. if not airport_nation[a1].lower() in nations:
  86. nations[airport_nation[a1].lower().strip()] = conteggio
  87. else:
  88. nations[airport_nation[a1].lower().strip()] += conteggio
  89.  
  90. if not airport_nation[a2].lower() in nations:
  91. nations[airport_nation[a2].lower().strip()] = conteggio
  92. else:
  93. nations[airport_nation[a2].lower().strip()] += conteggio
  94. except:
  95. errors += 1
  96.  
  97. print(airport_nation[a2].lower().strip(), nations[airport_nation[
  98. a2].lower().strip()]) # Number of times the united states have been 'visited' (departure or arrival) on various trips
  99. print('The number of errors in percentage is: ', errors / float(len(coppie)) * 100)
  100.  
  101. sorted_nations = sorted(nations.items(), key=operator.itemgetter(1)) #To print in an orderly way the naz. in which he found no errors
  102. print(sorted_nations[-10:], len(sorted_nations) )
  103.  
  104. #Count of the various data we need (gdp, number of flights) to make the graph with pyplot.
  105.  
  106. err = 0
  107. popgdp =[]
  108. n_voli = []
  109. for x in range(len(data)):
  110. try:
  111. country = data['Country'][x].lower().strip()
  112. gdp_capita = data['GDP ($ per capita)'][x]
  113. pop = data['Population'][x]
  114. service = data['Service'][x]
  115. #print country, pop*gdp_capita, pop, gdp_capita, nazioni[country]
  116.  
  117. n_voli += [nazioni[country]] #it must be put before n_voli of popgdp because if there is an error here it exits from the loop before putting in popgdp the value
  118. popgdp += [pop*gdp_capita]
  119.  
  120. except Exception as e:
  121. err += 1
  122.  
  123.  
  124. print(float(err)/len(data)) #print errors
  125.  
  126. #DISTRIBUTION OF DOUBLE LOG(popgdp, n_voli)
  127.  
  128. x = list(map(math.log, popgdp)) #comment if you whant to see the bug bug
  129. y = list(map(math.log, n_voli)) #comment if you whant to see the bug bug
  130. #x=popgdp #discomfort if you whant to see the bug bug
  131. #y=n_voli #discomfort if you whant to see the bug bug
  132. (m, b) = np.polyfit(x, y, 1)
  133.  
  134. yp = np.polyval([m, b], x)
  135. plt.plot(x, yp)
  136. plt.grid(True)
  137. plt.scatter(x,y)
  138. plt.xlabel('GDP',fontsize=10)
  139. plt.ylabel('Flights',fontsize=10)
  140.  
  141. #plt.xscale('log') #bug because it makes the wrong fit
  142. #plt.yscale('log') #bug because it makes the wrong fit
  143.  
  144. plt.show()
  145. print(m, b)
  146.  
  147. #Associazione di nome nazione e codice aeroportuale
  148.  
  149. nations_codes = open('nations_codes', 'r').read().split('\n')
  150. nations_codes_d = {}
  151. for x in nations_codes:
  152. # print x[5:].lower(), x.split(' ')[0]
  153. nations_codes_d[x[5:].lower()] = x.split(' ')[0]
  154.  
  155.  
  156. print(x[5:].lower() , nations_codes_d[x[5:].lower()])
  157. #print nations_codes_d # ---->discomfort if I want to see all the nation codes
  158.  
  159. #mappe html fatte in D3js---->JavaScript library for producing dynamic, interactive data visualizations in web browsers
  160. #Map source --https://bl.ocks.org/ChumaA/385a269db46ae56444772b62f1ae82bf --
  161.  
  162. json_voli = open('json_start', 'r').read()
  163. err = 0
  164. for x in range(len(data)):
  165. try:
  166. country = data['Country'][x].lower().strip()
  167. country_code = nations_codes_d[country]
  168. service = data['Service'][x]
  169. gdp_capita = data['GDP ($ per capita)'][x]
  170. pop = data['Population'][x]
  171.  
  172. json_voli += ''' {"key": "''' + country_code + '''", "doc_count": ''' + str(
  173. pop * gdp_capita / 1000000000000) + ''' },''' # ho cambiato i nomi e ho generato 3 pagine html con servizi gdp e voli
  174. # json_voli += ''' {"key": "'''+country_code+'''", "doc_count": '''+str(service*1000)+''' },'''
  175. # json_voli += ''' {"key": "'''+country_code+'''", "doc_count": '''+str(nations[country])+''' },'''
  176. except Exception as e:
  177. # print 'Error', e , country
  178. err += 1
  179.  
  180. json_voli = json_voli[:-1]
  181. json_voli += "]}}}"
  182. # print json_voli
  183.  
  184. # Print errors
  185. print(float(err) / len(data))
  186.  
  187. #Class that starting it tells me given an airport of departure an airport of arrival based on the probability of the airports
  188.  
  189. routes = open('routes.csv', 'r').read().split('\n')
  190. routes = map(lambda x: x.split(';'), routes)
  191.  
  192.  
  193.  
  194. coppie = {}
  195.  
  196. for r in routes:
  197. source_airport = r[2]
  198. destination_airport = r[4]
  199. aircoppia = ''.join(sorted([source_airport, destination_airport]))
  200. if aircoppia in coppie:
  201. coppie[aircoppia] += 1
  202. else:
  203. coppie[aircoppia] = 1
  204.  
  205.  
  206. class Passeggero_girovago:
  207. '''
  208. This passenger chooses a possible destination at random from those of the initial airport,
  209. assigning the frequency of the journey to the destination airport
  210. '''
  211.  
  212. def __init__(self, start):
  213. self.start = start
  214. self.steps = 0
  215.  
  216. def __repr__(self):
  217. return "ciccio si trova in: " + self.start + " dopo " + str(self.steps) + " viaggi"
  218.  
  219. @staticmethod
  220. def find_airports(aircode):
  221. cc = []
  222. for c in coppie.keys():
  223. airport1 = c[:3]
  224. airport2 = c[3:]
  225. freq_coll = coppie[c]
  226. if aircode == airport1:
  227. cc += [[airport2, freq_coll]]
  228. if aircode == airport2:
  229. cc += [[airport1, freq_coll]]
  230. return cc
  231.  
  232. @staticmethod
  233. def choose_airport(airports):
  234. max_number = sum([x[1] for x in airports])
  235. choosen = random.choice(range(max_number))
  236. y = 0
  237. number = 0
  238. while number <= choosen and y < len(airports):
  239. number = sum([x[1] for x in airports[:y]])
  240. y += 1
  241. return airports[y - 1][0]
  242.  
  243. def update(self):
  244. # print find_airports(self.start)
  245. arrival = self.choose_airport(self.find_airports(self.start))
  246. self.start = arrival
  247. self.steps += 1
  248.  
  249.  
  250. start = 'ORD'
  251. arrivo = 'sdohuedwihu' # We fix here a random name in order to make our class work
  252. ciccio = Passeggero_girovago(start)
  253. while start != arrivo:
  254. ciccio.update()
  255. print(ciccio)
  256. arrivo = ciccio.start
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement