Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import operator
- import pandas as pd
- import math
- import numpy as np
- import matplotlib.pyplot as plt
- import random
- # the first two lines do the same thing as read.csv
- routes = []
- with open('routes.csv', 'r') as file:
- # apre il file e riempie 'routes' con le righe del file
- routes = file.read().splitlines()
- # for each element in 'routes', divide the string around the ';'
- routes = [route.split(';') for route in routes]
- aircodes = []
- with open('airport-codes.csv', 'r') as file:
- aircodes = file.read().splitlines()
- aircodes = [code.split(';') for code in aircodes]
- data = pd.read_csv('countries of the world.csv', decimal=',')
- #Transform aircodes in dictionary + association of airport and nation
- airport_nation = {}
- for r in aircodes:
- airport_code = r[1]
- country = r[3]
- if not airport_code in airport_nation:
- airport_nation[airport_code] = country
- print(airport_code, country) #example of how this cell works
- print(airport_nation['GZA']) #example of how I get the country from the abbreviate form of the airport
- #Dictionary to make list with the abbreviations of the airports of departure (first 3 letters) and of arrival (second 3 letters), frequency
- coppie = {}
- for r in routes:
- source_airport = r[2]
- destination_airport = r[4]
- aircoppia = ''.join(sorted([source_airport, destination_airport]))
- if aircoppia in coppie:
- coppie[aircoppia] += 1
- else:
- coppie[aircoppia] = 1
- coppie = sorted(coppie.items(), key=operator.itemgetter(1))
- print(coppie[1]) #example of how it works, it works in reverse order (the one with less frequency first)
- print(coppie[-1]) #most busy
- print(len(coppie)) #test for know number of routes
- print(len(routes)) #tests to find out about total journeys
- #Starting from trying to make a map, the frequency of flights was found to be irregular, so using pyplot we printed a graph that reveals that return flights are much more popular than one-way flights
- collegamenti = [] #collegamenti means: number of times that route appears (not considering the order)
- freq_collegamento = [] #summ of sections that have n connections
- dist = [x[1] for x in coppie]
- for y in set([x[1] for x in coppie]):
- if not y > 25: #Elimination of outliers
- print(y, dist.count(y))
- collegamenti += [y]
- freq_collegamento += [dist.count(y)]
- plt.yscale('log')#log to detect distribution that is exponential
- plt.plot(collegamenti,freq_collegamento)
- plt.plot(collegamenti[1::2],freq_collegamento[1::2])#take peers (round trip)
- plt.plot(collegamenti[::2],freq_collegamento[::2])#takes the odd (one way or only return)
- plt.show()
- # We divide the number of pairs of airports that have an even number of connections for one that has an odd connection number and we see that the even ones are 11.39 times the odd number so there are more round trips than one way
- float(sum(freq_collegamento[1::2])) / sum(freq_collegamento[::2])
- errors = 0
- nations = {}
- for x in coppie:
- a1, a2, conteggio = x[0][:3], x[0][-3:], x[1]
- try:
- if not airport_nation[a1].lower() in nations:
- nations[airport_nation[a1].lower().strip()] = conteggio
- else:
- nations[airport_nation[a1].lower().strip()] += conteggio
- if not airport_nation[a2].lower() in nations:
- nations[airport_nation[a2].lower().strip()] = conteggio
- else:
- nations[airport_nation[a2].lower().strip()] += conteggio
- except:
- errors += 1
- print(airport_nation[a2].lower().strip(), nations[airport_nation[
- a2].lower().strip()]) # Number of times the united states have been 'visited' (departure or arrival) on various trips
- print('The number of errors in percentage is: ', errors / float(len(coppie)) * 100)
- sorted_nations = sorted(nations.items(), key=operator.itemgetter(1)) #To print in an orderly way the naz. in which he found no errors
- print(sorted_nations[-10:], len(sorted_nations) )
- #Count of the various data we need (gdp, number of flights) to make the graph with pyplot.
- err = 0
- popgdp =[]
- n_voli = []
- for x in range(len(data)):
- try:
- country = data['Country'][x].lower().strip()
- gdp_capita = data['GDP ($ per capita)'][x]
- pop = data['Population'][x]
- service = data['Service'][x]
- #print country, pop*gdp_capita, pop, gdp_capita, nazioni[country]
- n_voli += [nazioni[country]] #it must be put before n_voli of popgdp because if there is an error here it exits from the loop before putting in popgdp the value
- popgdp += [pop*gdp_capita]
- except Exception as e:
- err += 1
- print(float(err)/len(data)) #print errors
- #DISTRIBUTION OF DOUBLE LOG(popgdp, n_voli)
- x = list(map(math.log, popgdp)) #comment if you whant to see the bug bug
- y = list(map(math.log, n_voli)) #comment if you whant to see the bug bug
- #x=popgdp #discomfort if you whant to see the bug bug
- #y=n_voli #discomfort if you whant to see the bug bug
- (m, b) = np.polyfit(x, y, 1)
- yp = np.polyval([m, b], x)
- plt.plot(x, yp)
- plt.grid(True)
- plt.scatter(x,y)
- plt.xlabel('GDP',fontsize=10)
- plt.ylabel('Flights',fontsize=10)
- #plt.xscale('log') #bug because it makes the wrong fit
- #plt.yscale('log') #bug because it makes the wrong fit
- plt.show()
- print(m, b)
- #Associazione di nome nazione e codice aeroportuale
- nations_codes = open('nations_codes', 'r').read().split('\n')
- nations_codes_d = {}
- for x in nations_codes:
- # print x[5:].lower(), x.split(' ')[0]
- nations_codes_d[x[5:].lower()] = x.split(' ')[0]
- print(x[5:].lower() , nations_codes_d[x[5:].lower()])
- #print nations_codes_d # ---->discomfort if I want to see all the nation codes
- #mappe html fatte in D3js---->JavaScript library for producing dynamic, interactive data visualizations in web browsers
- #Map source --https://bl.ocks.org/ChumaA/385a269db46ae56444772b62f1ae82bf --
- json_voli = open('json_start', 'r').read()
- err = 0
- for x in range(len(data)):
- try:
- country = data['Country'][x].lower().strip()
- country_code = nations_codes_d[country]
- service = data['Service'][x]
- gdp_capita = data['GDP ($ per capita)'][x]
- pop = data['Population'][x]
- json_voli += ''' {"key": "''' + country_code + '''", "doc_count": ''' + str(
- pop * gdp_capita / 1000000000000) + ''' },''' # ho cambiato i nomi e ho generato 3 pagine html con servizi gdp e voli
- # json_voli += ''' {"key": "'''+country_code+'''", "doc_count": '''+str(service*1000)+''' },'''
- # json_voli += ''' {"key": "'''+country_code+'''", "doc_count": '''+str(nations[country])+''' },'''
- except Exception as e:
- # print 'Error', e , country
- err += 1
- json_voli = json_voli[:-1]
- json_voli += "]}}}"
- # print json_voli
- # Print errors
- print(float(err) / len(data))
- #Class that starting it tells me given an airport of departure an airport of arrival based on the probability of the airports
- routes = open('routes.csv', 'r').read().split('\n')
- routes = map(lambda x: x.split(';'), routes)
- coppie = {}
- for r in routes:
- source_airport = r[2]
- destination_airport = r[4]
- aircoppia = ''.join(sorted([source_airport, destination_airport]))
- if aircoppia in coppie:
- coppie[aircoppia] += 1
- else:
- coppie[aircoppia] = 1
- class Passeggero_girovago:
- '''
- This passenger chooses a possible destination at random from those of the initial airport,
- assigning the frequency of the journey to the destination airport
- '''
- def __init__(self, start):
- self.start = start
- self.steps = 0
- def __repr__(self):
- return "ciccio si trova in: " + self.start + " dopo " + str(self.steps) + " viaggi"
- @staticmethod
- def find_airports(aircode):
- cc = []
- for c in coppie.keys():
- airport1 = c[:3]
- airport2 = c[3:]
- freq_coll = coppie[c]
- if aircode == airport1:
- cc += [[airport2, freq_coll]]
- if aircode == airport2:
- cc += [[airport1, freq_coll]]
- return cc
- @staticmethod
- def choose_airport(airports):
- max_number = sum([x[1] for x in airports])
- choosen = random.choice(range(max_number))
- y = 0
- number = 0
- while number <= choosen and y < len(airports):
- number = sum([x[1] for x in airports[:y]])
- y += 1
- return airports[y - 1][0]
- def update(self):
- # print find_airports(self.start)
- arrival = self.choose_airport(self.find_airports(self.start))
- self.start = arrival
- self.steps += 1
- start = 'ORD'
- arrivo = 'sdohuedwihu' # We fix here a random name in order to make our class work
- ciccio = Passeggero_girovago(start)
- while start != arrivo:
- ciccio.update()
- print(ciccio)
- arrivo = ciccio.start
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement