Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python2
- # -*- coding: utf-8 -*-
- """
- Created on Mon Jun 26 12:36:35 2017
- @author: saliksyed
- """
- import matplotlib.pyplot as plt
- import numpy as np
- import matplotlib.pyplot as plt
- # Let's read in the countries for each airport:
- data = open("airports.dat", "r").readlines()
- final_data = []
- for row in data:
- row_items = row.split(",") # split by the comma
- data_dict = {}
- data_dict["airport_country"] = row_items[3].decode('utf-8', 'ignore')
- data_dict["altitude"] = float(row_items[8])
- final_data.append(data_dict)
- # now let's keep a count of each countries
- counts = {}
- for data_point in final_data:
- country_name = data_point["airport_country"]
- altitude = data_point["altitude"]
- if not country_name in counts:
- counts[country_name] = []
- counts[country_name].append(altitude)
- averages = {}
- for country in counts:
- averages[country] = np.average(counts[country])
- fig, ax = plt.subplots()
- #fig = the figure containing the visualization
- # ax = the axes that are attached to the visualization
- # Get the countries and their counts using hte map
- countries = averages.keys()
- airport_counts = averages.values()
- #### HERE is the magic:
- # I'll break it down step by step
- # zip() takes two arrays and merges the elements
- # so if you had zip(['a','b','c'], [1,2,3]) you would get a new array
- # [('a',1), ('b', 2), ('c', 3)]
- # pretty cool right?
- # now we sort the zipped array using a lambda. The lambda tells the sort algorithm
- # how it should choose the value to sort by (what the "key" is). in this case we want to sort by the airport count
- # this airport count is the second element so we say key=lambda x : x[1]. Finally we specify reverse=True
- # because we want the highest valued airports 1st. Finally we just pick the top 25!
- final_data = sorted(zip(countries,airport_counts), key=lambda x : x[1], reverse=True)[:25]
- # now we have the final data but it's in zipped format so we breka it back up into individual arrays:
- countries = [x[0] for x in final_data]
- airport_counts = [x[1] for x in final_data]
- # "arange" returns an evenly spaced interval of the specified length
- y_pos = np.arange(len(airport_counts))
- # add bars evenly spaced according to y_pos. The length of the bar should be the count of the airports
- ax.barh(y_pos, airport_counts, align='center',
- color='green')
- # add ticks to the axes
- ax.set_yticks(y_pos)
- # add a label to the axis with the name of each country
- ax.set_yticklabels(countries)
- ax.invert_yaxis() # labels read top-to-bottom
- # set the x axis label
- ax.set_xlabel('Average altitude')
- # set the chart title
- ax.set_title('Countries with the highest average altitude of airports')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement