Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on Jun 21st, 2012  |  syntax: None  |  size: 4.10 KB  |  hits: 13  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. XSLT: document analysis: how to output unique paths within a document?
  2. <beatles>
  3.   <beatle>
  4.     <name>
  5.       <first>John</first>
  6.       <last>Lennon</last>
  7.     </name>
  8.   </beatle>
  9.   <beatle>
  10.     <name>
  11.       <first>Paul</first>
  12.       <last>McCartney</last>
  13.     </name>
  14.   </beatle>
  15.   <beatle>
  16.     <name>
  17.       <first>George</first>
  18.       <last>Harrison</last>
  19.     </name>
  20.   </beatle>
  21.   <beatle>
  22.     <name>
  23.       <first>Ringo</first>
  24.       <last>Starr</last>
  25.     </name>
  26.   </beatle>
  27. </beatles>
  28.        
  29. from xml.sax.handler import ContentHandler
  30. from xml.sax import make_parser
  31. from xml.sax import SAXParseException
  32.  
  33. class ShowPaths(ContentHandler):
  34.  
  35.     def startDocument(self):
  36.         self.unique_paths=[]
  37.         self.current_path=[]
  38.  
  39.  
  40.     def startElement(self,name,attrs):
  41.         self.current_path.append(name)
  42.         path="/".join(self.current_path)
  43.         if path not in self.unique_paths:
  44.             self.unique_paths.append(path)
  45.  
  46.     def endElement(self,name):
  47.         self.current_path.pop();
  48.  
  49.     def endDocument(self):
  50.         for path in self.unique_paths:
  51.             print path
  52.  
  53. if __name__=='__main__':
  54.     handler = ShowPaths()
  55.     saxparser = make_parser()
  56.     saxparser.setContentHandler(handler)
  57.     in_f=open("d:\beatles.xml","r")
  58.     saxparser.parse(in_f)  
  59.     in_f.close()
  60.        
  61. beatles
  62. beatles/beatle
  63. beatles/beatle/name
  64. beatles/beatle/name/first
  65. beatles/beatle/name/last
  66.        
  67. <xsl:stylesheet version="1.0"
  68.  xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  69.  <xsl:output method="text"/>
  70.  <xsl:strip-space elements="*"/>
  71.  
  72.  <xsl:template match="*">
  73.         <xsl:apply-templates select="ancestor-or-self::*" mode="path"/>
  74.         <xsl:text>&#xA;</xsl:text>
  75.         <xsl:apply-templates/>
  76.  </xsl:template>
  77.  
  78.  <xsl:template match="*" mode="path">
  79.   <xsl:value-of select="concat('/',name())"/>
  80.  
  81.   <xsl:variable name="vnumPrecSiblings" select=
  82.         "count(preceding-sibling::*[name()=name(current())])"/>
  83.   <xsl:variable name="vnumFollSiblings" select=
  84.         "count(following-sibling::*[name()=name(current())])"/>
  85.  
  86.   <xsl:if test="$vnumPrecSiblings or $vnumFollSiblings">
  87.    <xsl:value-of select=
  88.      "concat('[', $vnumPrecSiblings +1, ']')"/>
  89.   </xsl:if>
  90.  </xsl:template>
  91.  
  92.  <xsl:template match="text()"/>
  93. </xsl:stylesheet>
  94.        
  95. <beatles>
  96.     <beatle>
  97.         <name>
  98.             <first>John</first>
  99.             <last>Lennon</last>
  100.         </name>
  101.     </beatle>
  102.     <beatle>
  103.         <name>
  104.             <first>Paul</first>
  105.             <last>McCartney</last>
  106.         </name>
  107.     </beatle>
  108.     <beatle>
  109.         <name>
  110.             <first>George</first>
  111.             <last>Harrison</last>
  112.         </name>
  113.     </beatle>
  114.     <beatle>
  115.         <name>
  116.             <first>Ringo</first>
  117.             <last>Starr</last>
  118.         </name>
  119.     </beatle>
  120. </beatles>
  121.        
  122. /beatles
  123. /beatles/beatle[1]
  124. /beatles/beatle[1]/name
  125. /beatles/beatle[1]/name/first
  126. /beatles/beatle[1]/name/last
  127. /beatles/beatle[2]
  128. /beatles/beatle[2]/name
  129. /beatles/beatle[2]/name/first
  130. /beatles/beatle[2]/name/last
  131. /beatles/beatle[3]
  132. /beatles/beatle[3]/name
  133. /beatles/beatle[3]/name/first
  134. /beatles/beatle[3]/name/last
  135. /beatles/beatle[4]
  136. /beatles/beatle[4]/name
  137. /beatles/beatle[4]/name/first
  138. /beatles/beatle[4]/name/last
  139.        
  140. <?xml version="1.0" encoding="UTF-8"?>
  141. <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" exclude-result-prefixes="xsl">
  142. <xsl:output omit-xml-declaration="yes" indent="yes"/>
  143. <xsl:strip-space elements="*"/>
  144. <xsl:key name="nodeName" match="node()" use="name()"/>
  145.  
  146. <xsl:template match="//*[not(*)]"/>
  147.  
  148. <xsl:template match="/">
  149.   <paths>
  150.     <xsl:apply-templates select="//*[not(*)]"/>
  151.   </paths>
  152. </xsl:template>
  153.  
  154. <xsl:template match="node()[count(. | key('nodeName', name())[1]) = 1]" >
  155.   <xsl:choose>
  156.     <xsl:when test="not(child::*)">
  157.       <path>
  158.         <xsl:apply-templates select="parent::*"/>
  159.         <xsl:value-of select="concat('/', name())"/>
  160.       </path>
  161.     </xsl:when>
  162.     <xsl:otherwise>
  163.       <xsl:apply-templates select="parent::*"/>
  164.       <xsl:value-of select="concat('/', name())"/>
  165.     </xsl:otherwise>
  166.   </xsl:choose>
  167. </xsl:template>
  168. </xsl:stylesheet>
  169.        
  170. <paths>
  171.   <path>/beatles/beatle/name/first</path>
  172.   <path>/beatles/beatle/name/last</path>
  173. </paths>