import xml.dom.minidom as m from pprint import pprint import re kml = "d:\state_pts_MI.kml" dom = m.parse(kml) placemarks = dom.getElementsByTagName("Placemark") r = {} # this is how we split the key/value pairs inside the description nodes splitre = re.compile("([\w\s]+):(.*)") unnamed_counter = 0 # one placemark is one jail for p in placemarks: # get name of jail from node, or generate an UUID if p.getElementsByTagName("name").length > 0: jail_name = p.getElementsByTagName("name")[0].firstChild.data else: unnamed_counter += 1 jail_name = "unnamed"+str(unnamed_counter) # get coordinates as list (split coordinates from KML at comma) coords = p.getElementsByTagName("coordinates")[0].firstChild.data.split(",") #pprint(coords) # convert to float so we can use the precision printing formats to define number of decimals, e.g. by using %.3f or %.6f lon = float(coords[0]) lat = float(coords[1]) # prepare results dict for this jail r[jail_name] = {} r[jail_name]["lon"] = lon r[jail_name]["lat"] = lat # exactly one description per jail d = p.getElementsByTagName("description")[0] # grab one line per key-value-pair descTextLines = d.toxml().replace("","").replace("","").replace("

","").split("

") for line in descTextLines: if line.strip() != "": # only if we have a nonempty line # split it by using the regexp we defined rem = splitre.match(line) kv = rem.groups() # now we have the braced capture groups in kv r[jail_name][kv[0]] = kv[1].strip() # display the results total_inmates = 0 for jailname in r.keys(): print("%s is @ %.3f,%.3f and has %d inmates." % (jailname, r[jailname]['lon'], r[jailname]['lat'], int(r[jailname]['2010 Correctional Population']))); total_inmates += int(r[jailname]['2010 Correctional Population']) print("Total inmates: %d" % total_inmates)