Untitled

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 26 12:36:35 2017

@author: saliksyed
"""

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt


# Let's read in the countries for each airport:

data = open("airports.dat", "r").readlines()

final_data = []

for row in data:
	row_items = row.split(",") # split by the comma
	data_dict = {}
	data_dict["airport_country"] = row_items[3].decode('utf-8', 'ignore')
	data_dict["altitude"] = float(row_items[8])
	final_data.append(data_dict)

# now let's keep a count of each countries

counts = {}
for data_point in final_data:
	country_name = data_point["airport_country"]
	altitude = data_point["altitude"]
	if not country_name in counts:
		counts[country_name] = []
	counts[country_name].append(altitude)

averages = {}
for country in counts:
	averages[country] = np.average(counts[country])


fig, ax = plt.subplots()

#fig = the figure containing the visualization
# ax = the axes that are attached to the visualization

# Get the countries and their counts using hte map
countries = averages.keys()
airport_counts = averages.values()

#### HERE is the magic:
# I'll break it down step by step
# zip() takes two arrays and merges the elements
# so if you had zip(['a','b','c'], [1,2,3]) you would get a new array
# [('a',1), ('b', 2), ('c', 3)]
# pretty cool right?
# now we sort the zipped array using a lambda. The lambda tells the sort algorithm
# how it should choose the value to sort by (what the "key" is). in this case we want to sort by the airport count
# this airport count is the second element so we say key=lambda x : x[1]. Finally we specify reverse=True
# because we want the highest valued airports 1st. Finally we just pick the top 25!

final_data = sorted(zip(countries,airport_counts), key=lambda x : x[1], reverse=True)[:25]

# now we have the final data but it's in zipped format so we breka it back up into individual arrays:
countries = [x[0] for x in final_data]
airport_counts =  [x[1] for x in final_data]


# "arange" returns an evenly spaced interval of the specified length
y_pos = np.arange(len(airport_counts))


# add bars evenly spaced according to y_pos. The length of the bar should be the count of the airports
ax.barh(y_pos, airport_counts,  align='center',
        color='green')

# add ticks to the axes
ax.set_yticks(y_pos)

# add a label to the axis with the name of each country
ax.set_yticklabels(countries)


ax.invert_yaxis()  # labels read top-to-bottom

# set the x axis label
ax.set_xlabel('Average altitude')

# set the chart title
ax.set_title('Countries with the highest average altitude of airports')

plt.show()