Advertisement
Uno-Dan

Untitled

Dec 22nd, 2018
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.08 KB | None | 0 0
  1. ########################################################################################################################
  2. #    File: main.py
  3. #  Author: Dan Huckson, https://github.com/unodan
  4. #    Date: 2018-12-22
  5. ########################################################################################################################
  6. # This code may need to be updated if the UNECE changes their format of the page in the link below.
  7. # http://www.unece.org/cefact/codesfortrade/codes_index.html
  8. # BeautifulSoup is used to scrap the download page for the current version number of the source zip file.
  9. ########################################################################################################################
  10.  
  11. from os import listdir, remove
  12. from csv import reader
  13. from json import dump
  14. from zipfile import ZipFile
  15. from bs4 import BeautifulSoup
  16. from urllib.request import urlretrieve, urlopen
  17.  
  18. with urlopen('http://www.unece.org/cefact/codesfortrade/codes_index.html') as response:
  19.     html = response.read()
  20.  
  21. soup = BeautifulSoup(html, features="html.parser")
  22. parsed_data = soup.find("div", {"id": "c21211"})
  23.  
  24. version_number = parsed_data.find_all(['td'])[3].text.split()[-1:][0]
  25. unlocode_zip_file = 'loc' + version_number.replace('-', '')[2:] + 'csv.zip'
  26. subdivision_codes_csv_file = version_number + ' SubdivisionCodes.csv'
  27. country_subdivisions = 'country-subdivisions.json'
  28.  
  29. zones = {}
  30. cities = []
  31. state_codes = []
  32. country_zones = {}
  33. encoding = 'ISO 8859-1'  # Character encoding used by the UNECE for the UN/LOCODE files.
  34.  
  35. # countries = {'CA': 'Canada', 'US': 'United States', 'AU': 'Australia', 'MX': 'Mexico', 'GB': 'United Kingdom'}
  36. countries = {'CA': 'Canada'}
  37.  
  38.  
  39. def get_unlocode_files():
  40.     # Site below is where the UN/LOCODE CSV files are located.
  41.     # http://www.unece.org/cefact/codesfortrade/codes_index.html
  42.     url = 'http://www.unece.org/fileadmin/DAM/cefact/locode/' + unlocode_zip_file
  43.     src_file_name = url.split('/')[-1]
  44.     urlretrieve(url, src_file_name)
  45.  
  46.     with ZipFile(src_file_name, 'r') as z:
  47.         z.extractall('.')
  48.  
  49.     remove(src_file_name)
  50.  
  51.     def get_files():
  52.         file_list = []
  53.  
  54.         for file_name in listdir('.'):
  55.             if 'UNLOCODE' in file_name and file_name.endswith('.csv'):
  56.                 file_list.append(file_name)
  57.         return sorted(file_list)
  58.  
  59.     return get_files()
  60.  
  61.  
  62. unlocode_files = get_unlocode_files()
  63.  
  64.  
  65. def get_zone_file(the_zone):
  66.  
  67.     for file_name in unlocode_files:
  68.         with open(file_name, encoding=encoding) as fn:
  69.             line = fn.readlines()
  70.  
  71.         last_country_code_in_file = line[-1].split(',')[1].strip('"')
  72.  
  73.         if the_zone <= last_country_code_in_file:
  74.             return file_name
  75.  
  76.  
  77. with open(subdivision_codes_csv_file, encoding=encoding) as f:
  78.     results = reader(f, delimiter=',', quotechar='"')
  79.  
  80.     for row in results:
  81.         country_code, zone_code, zone_name, zone_type = row
  82.         if zone_type == 'Outlying area':
  83.             continue
  84.  
  85.         if country_code in countries:
  86.             country_zones[zone_code] = {
  87.                 'zone': country_code,
  88.                 'code': zone_code,
  89.                 'name': zone_name,
  90.                 'type': zone_type,
  91.             }
  92.  
  93. for country_code, country_name in countries.items():
  94.  
  95.     if country_code not in zones:
  96.         zones[country_code] = {'code': country_code, 'name': country_name, 'zones': {}}
  97.  
  98.     file = get_zone_file(country_code)
  99.     with open(file, encoding=encoding) as f:
  100.         results = reader(f, delimiter=',', quotechar='"')
  101.         for row in results:
  102.             cntry_code = row[1]
  103.             zone_code = row[5].strip()
  104.             location_name = row[3]
  105.             location_code = row[2]
  106.             location_flags = row[6]
  107.             location_coordinates = row[10]
  108.  
  109.             if cntry_code == country_code:
  110.                 if '4' in location_flags and ' Apt' in location_name:
  111.                     continue
  112.  
  113.                 if zone_code not in country_zones or location_name.startswith('.'):
  114.                     continue
  115.  
  116.                 country = zones.get(cntry_code)
  117.  
  118.                 if zone_code in country['zones']:
  119.                     zone = country['zones'].get(zone_code)
  120.                 else:
  121.                     country['zones'][zone_code] = {}
  122.                     zone = country['zones'].get(zone_code)
  123.                     zone['code'] = country_zones[zone_code].get('code')
  124.                     zone['name'] = country_zones[zone_code].get('name')
  125.                     zone['type'] = country_zones[zone_code].get('type')
  126.                     zone['country'] = cntry_code
  127.                     zone['locations'] = {}
  128.  
  129.                 if location_code not in zone['locations']:
  130.                     zone['locations'][location_code] = {
  131.                         'code': location_code,
  132.                         'name': location_name,
  133.                         'flags': location_flags,
  134.                         'coordinates': location_coordinates
  135.                     }
  136.  
  137. with open(country_subdivisions, 'w', encoding=encoding) as f:
  138.     dump(zones, f, indent=2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement