Advertisement
gabalese

makeKindleTOC()

Apr 14th, 2012
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.25 KB | None | 0 0
  1. #! /usr/bin/env python3
  2. # file: optimize.py
  3. # Part of a larger helper script that minimizes manual intervention on ePubs exported from IDCS5.5
  4. # Feel free to use, improve, insult or whatever. No credit required.
  5.  
  6. try:
  7.     from lxml import etree as ET
  8. except ImportError:
  9.     print("REQUIRED: lxml library. Don't really know if the standard library xml.etree is equivalent")
  10.     # Portability is not a matter of concern to me.
  11.     # If you like to experiment, etree should be imported like:
  12.     # import xml.etree.ElementTree as ET
  13.     sys.exit(1)
  14.  
  15. # Namespaces shortcuts
  16.  
  17. NS_opf = "http://www.idpf.org/2007/opf"
  18. NS_dc =  "http://purl.org/dc/elements/1.1/"
  19.  
  20.    
  21. def makeKindleTOC():
  22.     """Parses the toc.ncx file to build an elementary html toc"""
  23.    
  24.     opf = ET.parse("OEBPS/content.opf").getroot()
  25.     reference = ET.SubElement(opf[3], "reference", attrib={"href":"toc.html", "type":"toc", "title":"toc.html"})
  26.     opf[1].append(ET.Element("item",attrib={"href":"toc.html","id":"toc.html","media-type":"application/xhtml+xml"}))
  27.    
  28.     # We don't use a linked toc for general purpose ePub, so the toc.html won't be added to the spine.
  29.     # The same item could be .append'ed to opf[2] if needed
  30.    
  31.     ncx = ET.parse("OEBPS/toc.ncx").getroot()
  32.    
  33.     list_a = []
  34.     list_b = []
  35.     text = ""
  36.     text += """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
  37. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
  38. <html xmlns="http://www.w3.org/1999/xhtml">
  39.     <head>
  40.         <title>TOC</title>
  41.         <link href="template.css" rel="stylesheet" type="text/css"/>
  42.     </head>
  43.     <body>""" # Yes, it's ugly. But it does work.
  44.    
  45.     cover = opf.xpath("//d:reference[@title='Cover'] | //d:reference[@title='cover']",namespaces={"d":NS_opf})[0].get("href")
  46.    
  47.     text += '<p><a href="%s">%s</a>' % (cover,"Cover") + "</p>" + "\n"
  48.    
  49.     for navpoint in ncx[2]:
  50.         list_a.append(navpoint[0][0].text)
  51.         list_b.append(navpoint[1].get("src"))
  52.        
  53.     for a,b in zip(list_a, list_b):
  54.         text += '<p><a href="%s">%s</a>' % (b,a) + "</p>" + "\n"
  55.    
  56.     text += "</body>" + "\n"
  57.     text += "</html>" + "\n"   
  58.    
  59.     toc_html = open("OEBPS/toc.html","w")
  60.     toc_html.write(text)
  61.     toc_html.close()
  62.    
  63.     opf = ET.ElementTree(opf)
  64.     opf.write("OEBPS/content.opf")
  65.    
  66. if __name__ == "__main__":
  67.     makeKindleTOC()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement