Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- from sys import argv
- import xml.etree.ElementTree as ET
- NS = 'x' # default namespace key # (any string is OK)
- class XMLParser(object):
- def __init__(self):
- self.ns = {} # namespace dict
- self.root = None # XML's root element
- # extracts the namespace (usually from the root element)
- def get_namespace(self, tag):
- return tag.split('}')[0][1:]
- # loads the XML file (here: from string)
- def load_xml(self, xmlstring):
- root = ET.fromstring(xmlstring)
- self.root = root
- self.ns[NS] = self.get_namespace(root.tag)
- return True
- # transforms XPath without namespaces to XPath with namespace
- # AND detects if last element is an attribute
- def ns_xpath(self, xpath):
- tags = xpath.split('/')
- if tags[-1].startswith('@'):
- attrib = tags.pop()[1:]
- else:
- attrib = None
- nsxpath = '/'.join(['%s:%s' % (NS, tag) for tag in tags])
- return nsxpath, attrib
- # `find` and `findall` method in one place honoring attributes in XPath
- def xfind(self, xpath, e=None, findall=False):
- if not e:
- e = self.root
- if not findall:
- f = e.find
- else:
- f = e.findall
- nsxpath, attrib = self.ns_xpath(xpath)
- e = f(nsxpath, self.ns)
- if attrib:
- return e.get(attrib)
- return e
- def main(xmlstring):
- p = XMLParser()
- p.load_xml(xmlstring)
- xpaths = {
- 'Element a:': 'a',
- 'Element b:': 'a/b',
- 'Attribute c:': 'a/b/@c'
- }
- for key, xpath in xpaths.items():
- print key, xpath, p.xfind(xpath)
- if __name__ == "__main__":
- xmlstring = """<root xmlns="http://www.example.com">
- <a>
- <b c="Hello, world!">
- </b>
- </a>
- </root>"""
- main(xmlstring)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement