Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/evn python
- import xml.sax
- class SBLGNTParser(xml.sax.handler.ContentHandler):
- """Class to parse the SBL GNT XML file"""
- def __init__(self, parent):
- self.init_meta_data()
- self.parent = parent
- def init_meta_data(self):
- self.in_book = 0
- self.in_book_title = 0
- self.in_paragraph = 0
- self.in_verse = 0
- self.in_word = 0
- self.in_suffix = 0
- def startElement(self, name, attrs):
- if name == "book":
- self.in_book = 1
- elif name == "title":
- self.in_book_title = 1
- elif name == "verse-number":
- self.in_verse = 1
- elif name == "w":
- self.in_word = 1
- elif name == "suffix":
- self.in_suffix = 1
- def characters(self, data):
- if self.in_word or self.in_suffix:
- # Here handle tokens
- self.parent.add_token(data)
- elif self.in_book or self.in_book_title or self.in_verse:
- # Here handle structs
- self.parent.add_token(data)
- def endElement(self, name):
- self.init_meta_data()
- class Command():
- def __init__(self):
- self.test = 1
- self.handler = SBLGNTParser(self)
- self.parser = xml.sax.make_parser()
- self.parser.setContentHandler(self.handler)
- def handle(self):
- self.parser.parse("/Users/sminatha/Downloads/SBLGNTxml/sblgnt.xml")
- def add_token(self, data):
- print data
- if __name__ == "__main__":
- test = Command()
- test.handle()
Add Comment
Please, Sign In to add comment