- XSLT: document analysis: how to output unique paths within a document?
- <beatles>
- <beatle>
- <name>
- <first>John</first>
- <last>Lennon</last>
- </name>
- </beatle>
- <beatle>
- <name>
- <first>Paul</first>
- <last>McCartney</last>
- </name>
- </beatle>
- <beatle>
- <name>
- <first>George</first>
- <last>Harrison</last>
- </name>
- </beatle>
- <beatle>
- <name>
- <first>Ringo</first>
- <last>Starr</last>
- </name>
- </beatle>
- </beatles>
- from xml.sax.handler import ContentHandler
- from xml.sax import make_parser
- from xml.sax import SAXParseException
- class ShowPaths(ContentHandler):
- def startDocument(self):
- self.unique_paths=[]
- self.current_path=[]
- def startElement(self,name,attrs):
- self.current_path.append(name)
- path="/".join(self.current_path)
- if path not in self.unique_paths:
- self.unique_paths.append(path)
- def endElement(self,name):
- self.current_path.pop();
- def endDocument(self):
- for path in self.unique_paths:
- print path
- if __name__=='__main__':
- handler = ShowPaths()
- saxparser = make_parser()
- saxparser.setContentHandler(handler)
- in_f=open("d:\beatles.xml","r")
- saxparser.parse(in_f)
- in_f.close()
- beatles
- beatles/beatle
- beatles/beatle/name
- beatles/beatle/name/first
- beatles/beatle/name/last
- <xsl:stylesheet version="1.0"
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
- <xsl:output method="text"/>
- <xsl:strip-space elements="*"/>
- <xsl:template match="*">
- <xsl:apply-templates select="ancestor-or-self::*" mode="path"/>
- <xsl:text>
</xsl:text>
- <xsl:apply-templates/>
- </xsl:template>
- <xsl:template match="*" mode="path">
- <xsl:value-of select="concat('/',name())"/>
- <xsl:variable name="vnumPrecSiblings" select=
- "count(preceding-sibling::*[name()=name(current())])"/>
- <xsl:variable name="vnumFollSiblings" select=
- "count(following-sibling::*[name()=name(current())])"/>
- <xsl:if test="$vnumPrecSiblings or $vnumFollSiblings">
- <xsl:value-of select=
- "concat('[', $vnumPrecSiblings +1, ']')"/>
- </xsl:if>
- </xsl:template>
- <xsl:template match="text()"/>
- </xsl:stylesheet>
- <beatles>
- <beatle>
- <name>
- <first>John</first>
- <last>Lennon</last>
- </name>
- </beatle>
- <beatle>
- <name>
- <first>Paul</first>
- <last>McCartney</last>
- </name>
- </beatle>
- <beatle>
- <name>
- <first>George</first>
- <last>Harrison</last>
- </name>
- </beatle>
- <beatle>
- <name>
- <first>Ringo</first>
- <last>Starr</last>
- </name>
- </beatle>
- </beatles>
- /beatles
- /beatles/beatle[1]
- /beatles/beatle[1]/name
- /beatles/beatle[1]/name/first
- /beatles/beatle[1]/name/last
- /beatles/beatle[2]
- /beatles/beatle[2]/name
- /beatles/beatle[2]/name/first
- /beatles/beatle[2]/name/last
- /beatles/beatle[3]
- /beatles/beatle[3]/name
- /beatles/beatle[3]/name/first
- /beatles/beatle[3]/name/last
- /beatles/beatle[4]
- /beatles/beatle[4]/name
- /beatles/beatle[4]/name/first
- /beatles/beatle[4]/name/last
- <?xml version="1.0" encoding="UTF-8"?>
- <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" exclude-result-prefixes="xsl">
- <xsl:output omit-xml-declaration="yes" indent="yes"/>
- <xsl:strip-space elements="*"/>
- <xsl:key name="nodeName" match="node()" use="name()"/>
- <xsl:template match="//*[not(*)]"/>
- <xsl:template match="/">
- <paths>
- <xsl:apply-templates select="//*[not(*)]"/>
- </paths>
- </xsl:template>
- <xsl:template match="node()[count(. | key('nodeName', name())[1]) = 1]" >
- <xsl:choose>
- <xsl:when test="not(child::*)">
- <path>
- <xsl:apply-templates select="parent::*"/>
- <xsl:value-of select="concat('/', name())"/>
- </path>
- </xsl:when>
- <xsl:otherwise>
- <xsl:apply-templates select="parent::*"/>
- <xsl:value-of select="concat('/', name())"/>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:template>
- </xsl:stylesheet>
- <paths>
- <path>/beatles/beatle/name/first</path>
- <path>/beatles/beatle/name/last</path>
- </paths>