Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import xml.etree.cElementTree as ET
- from collections import defaultdict
- import re
- import pprint
- OSMFILE = "chicago_illinois.osm"
- expected = ["60183", "60513", "60126", "60007", "60660", "60440", "60022", "60642", "60185"]
- mapping = {"Wasco, IL 60183" : "60183", "IL, 60642" : "60642", "IL, 60126" : "60126",
- "IL 60118" : "60118", "IL 60707" : "60707", "IL 60605-1226" : "60605"}
- """
- If the postal code contains any none digit numbers or
- if it is greater than length 5, it appends it to the error_codes.
- Else it updates the postal_codes with this_postal_code.
- """
- def audit_postal_code(error_codes, postal_codes, this_postal_code):
- # Append incorrect zip codes to list
- if this_postal_code.isdigit() == False:
- error_codes.append(this_postal_code)
- elif len(this_postal_code) != 5:
- error_codes.append(this_postal_code)
- else:
- postal_codes.update([this_postal_code])
- def is_postal_code(elem):
- return (elem.attrib['k'] == "addr:postcode")
- def audit_post(osmfile):
- # Parse osm file for incorrect postal codes
- osm_file = open(osmfile, "r")
- error_codes = []
- postal_codes = set([])
- for event, elem in ET.iterparse(osm_file, events=("start",)):
- if elem.tag == "node" or elem.tag == "way":
- for tag in elem.iter("tag"):
- if is_postal_code(tag):
- audit_postal_code(error_codes, postal_codes, tag.attrib["v"])
- return error_codes, postal_codes
- def update_name_postcode(this_postal_code,mapping):
- # split the postcode at '-' and only keep the first part
- if '-' in this_postal_code:
- this_postal_code = this_postal_code.split('-')[0]
- # split the postcode at '\' and only keep the first part
- elif '/' in this_postal_code:
- this_postal_code = this_postal_code.split('/')[0]
- if this_postal_code not in expected:
- if this_postal_code in mapping.keys():
- this_postal_code = mapping[this_postal_code]
- return this_postal_code
- errors, clean = audit_post(OSMFILE)
- for name in errors:
- better_name = update_name_postcode(name, mapping)
- print name, "=>", better_name
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement