import xml.dom.minidom as m
from pprint import pprint
import re
import uuid
kml = "d:\state_pts_MI.kml"
dom = m.parse(kml)
placemarks = dom.getElementsByTagName("Placemark")
r = {}
# this is how we split the key/value pairs inside the description nodes
splitre = re.compile("([\w\s]+):(.*)")
# one placemark is one jail
for p in placemarks:
# get name of jail from <name> node, or generate an UUID
if p.getElementsByTagName("name").length > 0:
jail_name = p.getElementsByTagName("name")[0].firstChild.data
else:
jail_name = uuid.uuid1().urn
# pprint(jail_name)
# prepare results dict for this jail
r[jail_name] = {}
# exactly one description per jail
d = p.getElementsByTagName("description")[0]
# grab one line per key-value-pair
descTextLines = d.toxml().replace("<description>","").replace("</description>","").replace("<div>","").split("</div>")
for line in descTextLines:
if line.strip() != "":
# only if we have a nonempty line
# split it by using the regexp we defined
rem = splitre.match(line)
kv = rem.groups()
# now we have the braced capture groups in kv
r[jail_name][kv[0]] = kv[1].strip()
# display the results
# pprint(r)
total_inmates = 0
for jailname in r.keys():
print(jailname + " has " + r[jailname]['2010 Correctional Population'] + " inmates.");
total_inmates += int(r[jailname]['2010 Correctional Population'])
print("Total inmates: %d" % total_inmates)