eyana-m

Untitled

Jun 30th, 2018
6,272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.57 KB | None | 0 0
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. import csv
  5. from math import cos, asin, sqrt
  6. from timeit import default_timer as timer
  7. from pathlib import Path
  8. from datetime import datetime
  9.  
  10.  
  11. # ----------------------------------------------------
  12. # ---------------- Inputs: FILES  --------------------
  13. # ----------------------------------------------------
  14.  
  15. FILE_AIRPORTS  = "../Input/world-airports.csv"
  16.  
  17. FILE_ZIPCODES  = "../Input/us_postal_codes.csv"
  18.  
  19. # -------------------------------------------------------
  20. # ---------------- Inputs: Dataframes -------------------
  21. # -------------------------------------------------------
  22.  
  23.  
  24. df_airports = pd.read_csv(FILE_AIRPORTS,encoding = "ISO-8859-1")
  25. columns_to_drop = ['elevation_ft', 'scheduled_service', 'gps_code',
  26.        'home_link', 'wikipedia_link', 'keywords', 'score',
  27.        'last_updated']
  28. df_airports.drop(columns_to_drop, axis=1, inplace=True)
  29.  
  30. # filter only US
  31. df_airports_filter = df_airports[(df_airports['iso_country']=='US') & (df_airports['type'].isin(['large_airport','medium_airport','small_airport']))]
  32.  
  33. # Retrieve State Abbreviation
  34. df_airports_filter = df_airports_filter.copy()
  35. df_airports_filter.loc[:,'iso_state'] = df_airports_filter['iso_region'].str.split('-').str[1]
  36.  
  37.  
  38. df_zipcodes = pd.read_csv(FILE_ZIPCODES,encoding = "ISO-8859-1")
  39.  
  40. def getAllStates():
  41.     return df_airports_filter['iso_state'].unique()
  42.  
  43. def getAirports(state):
  44.     df = df_airports_filter[df_airports_filter['iso_state']==state]
  45.     return df.to_dict('records')
  46.  
  47. def getZipcodes(state):
  48.     df = df_zipcodes[(df_zipcodes['State Abbreviation']==state)]
  49.     return df.to_dict('records')
  50.  
  51. def getInfo(state):
  52.     print(len(getAirports(state)), "airports in", state)
  53.     print(len(getZipcodes(state)), "zipcode in", state)
  54.  
  55.  
  56. # ------------------------------------------
  57. # ----------- Calculation ------------------
  58. # ------------------------------------------
  59.  
  60. # Function: distance, Purpose: Calculation
  61. # Calculates distance between two points: zipcode lat-lon and airport lat-lon
  62. # Based on Haversine Formula (found in StackOverflow)
  63. # Uses math library
  64. def distance(lat1, lon1, lat2, lon2):
  65.     p = 0.017453292519943295  #Pi/180
  66.     a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
  67.     return 12742 * asin(sqrt(a)) #2*R*asin..
  68.  
  69.  
  70. # Function: closest, Purpose: Calculation
  71. # Runs distance function to given airport dataset
  72. # Returns airport data with smallest distance to the given zipcode
  73. def closest(data, zipcode):
  74.     #return min(data, key=lambda p: distance(zipcode['latitude'],zipcode['longitude'],p['latitude'],p['longitude']))
  75.     dl = []
  76.     for p in data:
  77.         ap = {
  78.         'zipcode': zipcode['Zip Code'],
  79.         'country': zipcode['Country'],
  80.         'state': zipcode['State Abbreviation'],
  81.         'state_full': zipcode['State'],
  82.         'county': zipcode['County'],
  83.         'latitude-zip': zipcode['Latitude'],
  84.         'longitude-zip': zipcode['Longitude'],
  85.         'nearest-airport': p['ident'],
  86.         'latitude-air': p['latitude_deg'],
  87.         'longitude-air': p['longitude_deg'],
  88.         'distance': distance(zipcode['Latitude'],zipcode['Longitude'],p['latitude_deg'],p['longitude_deg'])
  89.         }
  90.         dl.append(ap)
  91.     dl_sorted = sorted(dl, key=lambda k: k['distance'])
  92.     writeZipsToCSV(dl_sorted,zipcode['State Abbreviation'],zipcode['Zip Code'])
  93.  
  94.  
  95.     return dl_sorted[0]
  96.  
  97.  
  98.  
  99.  
  100. def writeZipsToCSV(dl_sorted,state,zipcode):
  101.     output_folder = "Output/"+state+"/"
  102.     if not os.path.exists(output_folder):
  103.         os.makedirs(output_folder)
  104.  
  105.     with open(output_folder+str(zipcode)+"_all airports.csv","w") as csv_file:
  106.             dict_writer = csv.DictWriter(csv_file, dl_sorted[0].keys())
  107.             dict_writer.writeheader()
  108.             dict_writer.writerows(dl_sorted)
  109.             csv_file.close()
  110. # ------------------------------------------
  111. # ------- Orchestration & Output -----------
  112. # ------------------------------------------
  113.  
  114. entries = 0;
  115. i = datetime.now()
  116. timestamp = i.strftime('%Y-%m%d-')
  117. def calculateNearestAirport(state):
  118.     global entries
  119.     try:
  120.         zipcodes = getZipcodes(state)
  121.         dicts = []
  122.         print("Calculating for",state,"with", len(zipcodes), "zipcodes...")
  123.         for zc in zipcodes:
  124.             dicts.append(closest(getAirports(state), zc))
  125.  
  126.  
  127.         with open("Output/"+timestamp+state+"_nearest_airport.csv","w") as csv_file:
  128.             dict_writer = csv.DictWriter(csv_file, dicts[0].keys())
  129.             dict_writer.writeheader()
  130.             dict_writer.writerows(dicts)
  131.             csv_file.close()
  132.  
  133.     finally:
  134.         entries = entries + len(zipcodes)
  135.         print("Done calculating for ", len(zipcodes), "zipcodes of", state)
  136.  
  137.  
  138.  
  139. # ------------------------------------------
  140. # -------- Orchestration (Terminal) --------
  141. # ------------------------------------------
  142.  
  143. states_scope = getAllStates()[1:2]
  144.  
  145. perf_time = []
  146.  
  147.  
  148. try:
  149.     start = timer()
  150.     print("Calculating for the following states: ")
  151.     [print (x) for x in states_scope]
  152.     for state in states_scope:
  153.  
  154.         start_state = timer()
  155.         calculateNearestAirport(state)
  156.         end_state = timer()
  157.         diff = (end_state-start_state)
  158.         time_state={
  159.         'state': state,
  160.         'duration': round(diff/60,3)
  161.         }
  162.         perf_time.append(time_state)
  163.  
  164. finally:
  165.     end = timer()
  166.     print(round((end - start)/60,3), "minutes")
  167.     print(len(perf_time), "states")
  168.     print(entries, "zipcodes")
  169.     for k in perf_time:
  170.         print(k)
  171.  
  172. #print(len(df_airports_filter))
  173. #getInfo('NY')
Advertisement
Add Comment
Please, Sign In to add comment