Advertisement
Guest User

Untitled

a guest
Aug 1st, 2015
238
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.18 KB | None | 0 0
  1. import sys
  2. from xml.etree import ElementTree
  3. import os
  4.  
  5. def run(filepath):
  6.  
  7. #Prepare basic OS path stuff
  8. slice_n_splice_path = filepath.replace("original", "slice_n_splice")
  9. if not os.path.isdir(slice_n_splice_path):
  10. os.makedirs(slice_n_splice_path)
  11.  
  12. for file in os.listdir(filepath):
  13.  
  14. #Start crunching XML
  15. bookdata = ElementTree.parse(filepath+file).getroot()
  16.  
  17. #Make sure we look only for "title" elements inside of "bookinfo"
  18. if bookdata.getiterator('bookinfo'):
  19. bookxml = open(slice_n_splice_path + (os.path.basename(file)) + '.spliced.xml', 'w+')
  20.  
  21.  
  22. for booktitleelement in bookdata.getiterator('title'):
  23.  
  24. #Construct XML header and root element
  25. bookxml.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<book>\n")
  26.  
  27. #Grab title and bookinfo elements from book file
  28. bookxml.write(ElementTree.tostring(booktitleelement))
  29. for bookinfoelement in bookdata.getiterator('bookinfo'):
  30. bookxml.write(ElementTree.tostring(bookinfoelement))
  31.  
  32. #Iterate through chapterref elements and grab individual chapter XML files
  33. for chapterxmlelement in bookdata.getiterator('chapterref'):
  34. chapterxml = chapterxmlelement.attrib['href']
  35. firstchapter = None
  36. chapterdata = ElementTree.parse(filepath+'/'+chapterxml).getroot()
  37. if firstchapter is None:
  38. firstchapter = chapterdata
  39. else:
  40. firstchapter.append(chapterdata)
  41.  
  42. bookxml.write(ElementTree.tostring(firstchapter))
  43. bookxml.write('\n')
  44.  
  45. #Add chapterrefs back in
  46. for bookchapterrefelement in bookdata.getiterator('chapterref'):
  47. bookxml.write(ElementTree.tostring(bookchapterrefelement))
  48.  
  49. #Close root tag
  50. bookxml.write("</book>")
  51.  
  52. bookxml.close()
  53.  
  54. #If no "bookinfo" element, then don't create new slice-n-splice file
  55. else:
  56. pass
  57.  
  58. if __name__ == "__main__":
  59. run(sys.argv[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement