Guest User

Untitled

a guest
Jun 21st, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.30 KB | None | 0 0
  1. #! /usr/bin/evn python
  2.  
  3. import xml.sax
  4.  
  5. class SBLGNTParser(xml.sax.handler.ContentHandler):
  6. """Class to parse the SBL GNT XML file"""
  7.  
  8. def __init__(self, parent):
  9. self.init_meta_data()
  10. self.parent = parent
  11.  
  12. def init_meta_data(self):
  13. self.in_book = 0
  14. self.in_book_title = 0
  15. self.in_paragraph = 0
  16. self.in_verse = 0
  17. self.in_word = 0
  18. self.in_suffix = 0
  19.  
  20. def startElement(self, name, attrs):
  21. if name == "book":
  22. self.in_book = 1
  23.  
  24. elif name == "title":
  25. self.in_book_title = 1
  26.  
  27. elif name == "verse-number":
  28. self.in_verse = 1
  29.  
  30. elif name == "w":
  31. self.in_word = 1
  32.  
  33. elif name == "suffix":
  34. self.in_suffix = 1
  35.  
  36. def characters(self, data):
  37. if self.in_word or self.in_suffix:
  38. # Here handle tokens
  39. self.parent.add_token(data)
  40.  
  41.  
  42. elif self.in_book or self.in_book_title or self.in_verse:
  43. # Here handle structs
  44. self.parent.add_token(data)
  45.  
  46.  
  47. def endElement(self, name):
  48. self.init_meta_data()
  49.  
  50.  
  51. class Command():
  52.  
  53. def __init__(self):
  54. self.test = 1
  55. self.handler = SBLGNTParser(self)
  56. self.parser = xml.sax.make_parser()
  57. self.parser.setContentHandler(self.handler)
  58.  
  59. def handle(self):
  60. self.parser.parse("/Users/sminatha/Downloads/SBLGNTxml/sblgnt.xml")
  61.  
  62. def add_token(self, data):
  63. print data
  64.  
  65.  
  66. if __name__ == "__main__":
  67. test = Command()
  68. test.handle()
Add Comment
Please, Sign In to add comment