Advertisement
Guest User

Untitled

a guest
Sep 22nd, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.69 KB | None | 0 0
  1. #! /usr/bin/env python
  2. import sys
  3. import getopt
  4. import os, os.path
  5. import re
  6. from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
  7.  
  8. def sanitize_html( html ):
  9.     '''
  10.    Probably lots of opporunities for others here, too
  11.    '''
  12.     return re.sub(r'<meta name="Adept.resource" [^>]*>','',html)
  13.  
  14. def main(args):
  15.     (opts, optargs) = getopt.getopt( args[1:], 'f' )
  16.     overwrite_ok = False
  17.     for o, oa in opts:
  18.         if '-f' == o:
  19.             overwrite_ok = True
  20.     epubfn = optargs[0]
  21.  
  22.     zf = ZipFile(epubfn)
  23.  
  24.     newdn = os.path.dirname( epubfn )
  25.     newfn =  '_clean_%s' % os.path.basename(epubfn)
  26.     epubfn2 = os.path.join( newdn, newfn )
  27.     del newdn, newfn
  28.  
  29.     if os.path.exists( epubfn2 ) and not overwrite_ok:
  30.         print "I will not overwrite %s" % epubfn2
  31.         return 1
  32.     zf2 = ZipFile( epubfn2 ,'w')
  33.  
  34.     html_re = re.compile(r'\.x?html?$')
  35.     interesting_ext = ['.ncx','.opf','.xml']
  36.     for entry in zf.infolist():
  37.         name = entry.filename
  38.         name_lc = name.lower()
  39.         for ext in interesting_ext:
  40.             if name_lc.endswith( ext ):
  41.                 print "Confirm(%s):" % name, zf.read(name)
  42.                 print ''
  43.         if not html_re.search(name.lower()):
  44.             if 'mimetype' == name:
  45.                 entry.compress_type = ZIP_STORED
  46.             else:
  47.                 entry.compress_type = ZIP_DEFLATED
  48.             zf2.writestr( entry, zf.read(name) )
  49.             continue
  50.         print "Sanitizing entry",name
  51.         html = zf.read(name)
  52.         html = sanitize_html( html )
  53.         zf2.writestr( entry, html )
  54.     zf2.close()
  55.     zf.close()
  56.     return 0
  57.    
  58. if __name__ == '__main__': sys.exit(main(sys.argv))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement