Pastebin.com

import xml.dom.minidom as m
from pprint import pprint
import re

kml = "d:\state_pts_MI.kml"
dom = m.parse(kml)

placemarks = dom.getElementsByTagName("Placemark")

r = {}

# this is how we split the key/value pairs inside the description nodes
splitre = re.compile("([\w\s]+):(.*)")

unnamed_counter = 0

# one placemark is one jail
for p in placemarks:

    # get name of jail from <name> node, or generate an UUID
    if p.getElementsByTagName("name").length > 0:
        jail_name = p.getElementsByTagName("name")[0].firstChild.data
    else:
        unnamed_counter += 1
        jail_name = "unnamed"+str(unnamed_counter)

    # get coordinates as list (split coordinates from KML at comma)
    coords = p.getElementsByTagName("coordinates")[0].firstChild.data.split(",")
    #pprint(coords)
    # convert to float so we can use the precision printing formats to define number of decimals, e.g. by using %.3f or %.6f
    lon = float(coords[0])
    lat = float(coords[1])

    # prepare results dict for this jail
    r[jail_name] = {}
    r[jail_name]["lon"] = lon
    r[jail_name]["lat"] = lat

    # exactly one description per jail
    d = p.getElementsByTagName("description")[0]

    # grab one line per key-value-pair
    descTextLines = d.toxml().replace("<description>","").replace("</description>","").replace("<div>","").split("</div>")

    for line in descTextLines:
        if line.strip() != "":
            # only if we have a nonempty line
            # split it by using the regexp we defined
            rem = splitre.match(line)
            kv = rem.groups()
            # now we have the braced capture groups in kv

            r[jail_name][kv[0]] = kv[1].strip()

# display the results

total_inmates = 0
for jailname in r.keys():
    print("%s is @ %.3f,%.3f and has %d inmates." % (jailname, r[jailname]['lon'], r[jailname]['lat'], int(r[jailname]['2010 Correctional Population'])));
    total_inmates += int(r[jailname]['2010 Correctional Population'])

print("Total inmates: %d" % total_inmates)