Advertisement
Guest User

Python parse XML ElementTree with namespace and attributes

a guest
Jan 9th, 2018
437
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.93 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3.  
  4. from sys import argv
  5. import xml.etree.ElementTree as ET
  6.  
  7. NS = 'x' # default namespace key # (any string is OK)
  8.  
  9. class XMLParser(object):
  10.     def __init__(self):
  11.         self.ns = {}     # namespace dict
  12.         self.root = None # XML's root element
  13.    
  14.     # extracts the namespace (usually from the root element)
  15.     def get_namespace(self, tag):
  16.         return tag.split('}')[0][1:]
  17.    
  18.     # loads the XML file (here: from string)
  19.     def load_xml(self, xmlstring):
  20.         root = ET.fromstring(xmlstring)
  21.         self.root = root
  22.         self.ns[NS] = self.get_namespace(root.tag)
  23.         return True
  24.    
  25.     # transforms XPath without namespaces to XPath with namespace
  26.     # AND detects if last element is an attribute
  27.     def ns_xpath(self, xpath):
  28.         tags = xpath.split('/')
  29.         if tags[-1].startswith('@'):
  30.             attrib = tags.pop()[1:]
  31.         else:
  32.             attrib = None
  33.         nsxpath = '/'.join(['%s:%s' % (NS, tag) for tag in tags])
  34.         return nsxpath, attrib
  35.    
  36.     # `find` and `findall` method in one place honoring attributes in XPath
  37.     def xfind(self, xpath, e=None, findall=False):
  38.         if not e:
  39.             e = self.root
  40.         if not findall:
  41.             f = e.find
  42.         else:
  43.             f = e.findall
  44.         nsxpath, attrib = self.ns_xpath(xpath)
  45.         e = f(nsxpath, self.ns)
  46.         if attrib:
  47.             return e.get(attrib)
  48.         return e
  49.  
  50. def main(xmlstring):
  51.     p = XMLParser()
  52.     p.load_xml(xmlstring)
  53.     xpaths = {
  54.         'Element a:': 'a',
  55.         'Element b:': 'a/b',
  56.         'Attribute c:': 'a/b/@c'
  57.         }
  58.     for key, xpath in xpaths.items():
  59.         print key, xpath, p.xfind(xpath)
  60.  
  61. if __name__ == "__main__":
  62.     xmlstring = """<root xmlns="http://www.example.com">
  63.        <a>
  64.            <b c="Hello, world!">
  65.            </b>
  66.        </a>
  67.    </root>"""
  68.     main(xmlstring)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement