Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- class XMLTag(object):
- def __init__(self, name, fields=None, data=None):
- self.name = name
- self.fields = fields or {}
- if isinstance(data, list):
- self.data = data
- else:
- self.data = []
- def addData(self, newData):
- if (len(self.data) > 0) and (isinstance(self.data[-1], str)
- and isinstance(newData, str)):
- self.data[-1] = self.data[-1] + newData
- else:
- self.data.append(newData)
- def clearStrings(self):
- ret = []
- for i in self.data:
- if not isinstance(i, str):
- ret.append(i)
- self.data = ret
- def find(self, tagName, which=0):
- last = None
- foundCount = 0
- tagName = tagName.lower()
- for i in self.data:
- if isinstance(i, XMLTag) and i.name.lower() == tagName:
- last = i
- if foundCount == which:
- return last
- foundCount += 1
- return last
- def __repr__(self):
- args = [self.name, self.fields, self.data]
- return "{}({})".format(self.__class__.__name__, ", ".join(repr(i) for i in args))
- class XMLCollector(object):
- def __init__(self):
- self.level = 0
- self.__data = XMLTag("root")
- def startTag(self, name, fields):
- currentLevel = self.__data
- for i in range(self.level):
- currentLevel = currentLevel.data[-1]
- if not name.endswith("/"):
- self.level += 1
- currentLevel.addData(XMLTag(name, fields))
- def tagData(self, data):
- currentLevel = self.__data
- for i in range(self.level):
- currentLevel = currentLevel.data[-1]
- currentLevel.addData(data)
- def endTag(self, name):
- self.level -= 1
- @property
- def data(self):
- return self.__data.data
- @property
- def name(self):
- return self.__data.name
- @property
- def fields(self):
- return self.__data.fields
- if __name__ == "__main__":
- import pprint
- from xml.parsers import expat
- xmlTags = XMLCollector()
- xmlFile = open("format.xml", "rb")
- eXMLParse = expat.ParserCreate()
- eXMLParse.StartElementHandler = xmlTags.startTag
- eXMLParse.EndElementHandler = xmlTags.endTag
- eXMLParse.CharacterDataHandler = xmlTags.tagData
- eXMLParse.ParseFile(xmlFile)
- for post in xmlTags.data:
- if isinstance(post, XMLTag) and post.name == "post":
- print("- {} -".format(post.fields['topic']))
- print(" Posted by: {} (id: {})".format(post.fields['author'], post.fields['id']))
- contents = post.find("contents")
- if not contents:
- print("> [none]")
- continue
- contents = "".join(contents.data)
- contents = "> " + contents.replace("\n", "> ")
- print(contents)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement