Advertisement
rsmoorthy

Python: XML to Python Dict

Aug 22nd, 2013
961
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.73 KB | None | 0 0
  1. import sys
  2. from xml.dom import minidom
  3.  
  4. # suit our needs
  5.  
  6. class XML2Dict(object):
  7.     """ Converts an XML object to Python Dictionary
  8.        Based on http://stackoverflow.com/questions/3292973/xml-to-from-a-python-dictionary
  9.        with additional updates done
  10.    """
  11.     def __init__(self):
  12.         # By default it creates a list only if there is more than one item for a given key
  13.         # But if you want some keys to be always list, specify in keys_always_list
  14.         self.keys_always_list = ["action", "test", "command", "condition", "match_intent", "app"]
  15.  
  16.         # Merge keys like this
  17.         # If { "actions" : { "action": [... ] } }
  18.         # Change to { "actions" : [ ... ] }
  19.         # Note here "actions" have the singular "action"
  20.         self.merge_keys = ["action", "test", "match_intent", "app" ]
  21.  
  22.     def __dappend(self, dictionary, key, item):
  23.         """Append item to dictionary at key.  Only create a list if there is more than one item for the given key.
  24.        dictionary[key]=item if key doesn't exist.
  25.        dictionary[key].append(item) if key exists."""
  26.  
  27.         if key == "#attributes":
  28.             for k,v in item.iteritems():
  29.                 dictionary.setdefault(k, v)
  30.             return
  31.  
  32.         if key in dictionary.keys() or key in self.keys_always_list:
  33.             if key not in dictionary.keys():
  34.                 dictionary[key] = []
  35.             if not isinstance(dictionary[key], list):
  36.                 lst=[]
  37.                 lst.append(dictionary[key])
  38.                 lst.append(item)
  39.                 dictionary[key]=lst
  40.             else:
  41.                 dictionary[key].append(item)
  42.         else:
  43.             dictionary.setdefault(key, item)
  44.  
  45.     def __node_attributes(self, node):
  46.         """Return an attribute dictionary """
  47.         if node.hasAttributes():
  48.             return dict([(str(attr), str(node.attributes[attr].value)) for attr in node.attributes.keys()])
  49.         else:
  50.             return None
  51.  
  52.     def __attr_str(self, node):
  53.         return "%s-attrs" % str(node.nodeName)
  54.  
  55.     def __hasAttributes(self, node):
  56.         if node.nodeType == node.ELEMENT_NODE:
  57.             if node.hasAttributes():
  58.                 return True
  59.         return False
  60.  
  61.     def __with_attributes(self, node, values):
  62.         if self.__hasAttributes(node):
  63.             if isinstance(values, dict):
  64.                 self.__dappend(values, '#attributes', self.__node_attributes(node))
  65.                 return { str(node.nodeName): values }
  66.             elif isinstance(values, str):
  67.                 return { str(node.nodeName): values,
  68.                          self.__attr_str(node): self.__node_attributes(node)}
  69.         else:
  70.             return { str(node.nodeName): values }
  71.  
  72.     def xml2dict(self, node):
  73.         """Given an xml dom node tree,
  74.        return a python dictionary corresponding to the tree structure of the XML.
  75.        This parser does not make lists unless they are needed.  For example:
  76.  
  77.        '<list><item>1</item><item>2</item></list>' becomes:
  78.        { 'list' : { 'item' : ['1', '2'] } }
  79.        BUT
  80.        '<list><item>1</item></list>' would be:
  81.        { 'list' : { 'item' : '1' } }
  82.  
  83.        By default it creates a list only if there is more than one item for a given key.
  84.        But if you want some keys to be always list, specify in keys_always_list
  85.  
  86.        This is a shortcut for a particular problem and probably not a good long-term design.
  87.        """
  88.         if not node.hasChildNodes():
  89.             if node.nodeType == node.TEXT_NODE:
  90.                 if node.data.strip() != '':
  91.                     return str(node.data.strip())
  92.                 else:
  93.                     return None
  94.             else:
  95.                 ret =  self.__with_attributes(node, None)
  96.                 return ret
  97.         else:
  98.             #recursively create the list of child nodes
  99.             childlist = []
  100.             for child in node.childNodes:
  101.                 xc = self.xml2dict(child)
  102.                 if xc != None and child.nodeType != child.COMMENT_NODE:
  103.                     childlist.append(xc)
  104.             if len(childlist)==1 and isinstance(childlist[0], str):
  105.                 return self.__with_attributes(node, childlist[0])
  106.             else:
  107.                 #if False not in [isinstance(child, dict) for child in childlist]:
  108.                 new_dict={}
  109.                 for child in childlist:
  110.                     if isinstance(child, dict):
  111.                         for k in child:
  112.                             self.__dappend(new_dict, k, child[k])
  113.                     elif isinstance(child, str):
  114.                         self.__dappend(new_dict, '#text', child)
  115.                     else:
  116.                         print "ERROR"
  117.                 ret =  self.__with_attributes(node, new_dict)
  118.  
  119.                 # Merge keys like this
  120.                 # If { "actions" : { "action": [... ] } }
  121.                 # Change to { "actions" : [ ... ] }
  122.                 # Note here "actions" have the singular "action"
  123.                 # Also: the singular "action" should be the only element within the dict and should
  124.                 #       be a list
  125.                 if len(ret) == 1 and isinstance(ret.values()[0], dict):
  126.                     k,v = ret.items()[0]
  127.                     if isinstance(v, dict):
  128.                         k1,v1 = v.items()[0]
  129.                         if isinstance(v1, list) and k1 in self.merge_keys and k1 + "s" == node.nodeName:
  130.                             ret = { str(node.nodeName): v1 }
  131.                 return ret
  132.  
  133.     def load(fname):
  134.         return xmldom2dict(minidom.parse(fname))
  135.  
  136.  
  137.  
  138. if __name__=='__main__':
  139.     data = minidom.parse(sys.argv[1])
  140.  
  141.     xd = XML2Dict()
  142.     d = xd.xml2dict(data)
  143.  
  144.     import pprint
  145.     pprint.pprint(d)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement