Untitled

import sys
from xml.etree import ElementTree
import os

def run(filepath):

    #Prepare basic OS path stuff
    slice_n_splice_path = filepath.replace("original", "slice_n_splice")
    if not os.path.isdir(slice_n_splice_path):
        os.makedirs(slice_n_splice_path)

    for file in os.listdir(filepath):

        #Start crunching XML
        bookdata = ElementTree.parse(filepath+file).getroot()

        #Make sure we look only for "title" elements inside of "bookinfo"
        if bookdata.getiterator('bookinfo'):
            bookxml = open(slice_n_splice_path + (os.path.basename(file)) + '.spliced.xml', 'w+')


            for booktitleelement in bookdata.getiterator('title'):

                #Construct XML header and root element
                bookxml.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<book>\n")

                #Grab title and bookinfo elements from book file
                bookxml.write(ElementTree.tostring(booktitleelement))
                for bookinfoelement in bookdata.getiterator('bookinfo'):
                    bookxml.write(ElementTree.tostring(bookinfoelement))

                #Iterate through chapterref elements and grab individual chapter XML files
                for chapterxmlelement in bookdata.getiterator('chapterref'):
                    chapterxml = chapterxmlelement.attrib['href']
                    firstchapter = None
                    chapterdata = ElementTree.parse(filepath+'/'+chapterxml).getroot()
                    if firstchapter is None:
                        firstchapter = chapterdata
                    else:
                        firstchapter.append(chapterdata)

                    bookxml.write(ElementTree.tostring(firstchapter))
                    bookxml.write('\n')

                #Add chapterrefs back in
                for bookchapterrefelement in bookdata.getiterator('chapterref'):
                    bookxml.write(ElementTree.tostring(bookchapterrefelement))

                #Close root tag
                bookxml.write("</book>")

                bookxml.close()

        #If no "bookinfo" element, then don't create new slice-n-splice file
        else:
            pass

if __name__ == "__main__":
    run(sys.argv[1])