Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- import sys
- import getopt
- import os, os.path
- import re
- from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED
- def sanitize_html( html ):
- '''
- Probably lots of opporunities for others here, too
- '''
- return re.sub(r'<meta name="Adept.resource" [^>]*>','',html)
- def main(args):
- (opts, optargs) = getopt.getopt( args[1:], 'f' )
- overwrite_ok = False
- for o, oa in opts:
- if '-f' == o:
- overwrite_ok = True
- epubfn = optargs[0]
- zf = ZipFile(epubfn)
- newdn = os.path.dirname( epubfn )
- newfn = '_clean_%s' % os.path.basename(epubfn)
- epubfn2 = os.path.join( newdn, newfn )
- del newdn, newfn
- if os.path.exists( epubfn2 ) and not overwrite_ok:
- print "I will not overwrite %s" % epubfn2
- return 1
- zf2 = ZipFile( epubfn2 ,'w')
- html_re = re.compile(r'\.x?html?$')
- interesting_ext = ['.ncx','.opf','.xml']
- for entry in zf.infolist():
- name = entry.filename
- name_lc = name.lower()
- for ext in interesting_ext:
- if name_lc.endswith( ext ):
- print "Confirm(%s):" % name, zf.read(name)
- print ''
- if not html_re.search(name.lower()):
- if 'mimetype' == name:
- entry.compress_type = ZIP_STORED
- else:
- entry.compress_type = ZIP_DEFLATED
- zf2.writestr( entry, zf.read(name) )
- continue
- print "Sanitizing entry",name
- html = zf.read(name)
- html = sanitize_html( html )
- zf2.writestr( entry, html )
- zf2.close()
- zf.close()
- return 0
- if __name__ == '__main__': sys.exit(main(sys.argv))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement