import xml.dom.minidom as m from pprint import pprint import re import uuid kml = "d:\state_pts_MI.kml" dom = m.parse(kml) placemarks = dom.getElementsByTagName("Placemark") r = {} # this is how we split the key/value pairs inside the description nodes splitre = re.compile("([\w\s]+):(.*)") # one placemark is one jail for p in placemarks: # get name of jail from node, or generate an UUID if p.getElementsByTagName("name").length > 0: jail_name = p.getElementsByTagName("name")[0].firstChild.data else: jail_name = uuid.uuid1().urn # pprint(jail_name) # prepare results dict for this jail r[jail_name] = {} # exactly one description per jail d = p.getElementsByTagName("description")[0] # grab one line per key-value-pair descTextLines = d.toxml().replace("","").replace("","").replace("

","").split("

") for line in descTextLines: if line.strip() != "": # only if we have a nonempty line # split it by using the regexp we defined rem = splitre.match(line) kv = rem.groups() # now we have the braced capture groups in kv r[jail_name][kv[0]] = kv[1].strip() # display the results # pprint(r) total_inmates = 0 for jailname in r.keys(): print(jailname + " has " + r[jailname]['2010 Correctional Population'] + " inmates."); total_inmates += int(r[jailname]['2010 Correctional Population']) print("Total inmates: %d" % total_inmates)