Advertisement
Guest User

jpeg2epub

a guest
Jan 20th, 2020
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.03 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. # jpeg2epub: copyright (C) 2013, RUAMEL bvba, A. van der Neut
  4.  
  5. import os
  6. import sys
  7. from io import open
  8. from textwrap import dedent
  9. from cStringIO import StringIO
  10. import zipfile
  11. import uuid
  12. import datetime
  13.  
  14.  
  15. class Jpeg2Epub(object):
  16. """simple epub creator for series of JPEG image files
  17.  
  18. creates the file epub file in memory
  19. """
  20. version = 1 # class version when used a s library
  21.  
  22. def __init__(self, title, file_name=None, creator=None, title_sort=None,
  23. series=None, series_idx=None, verbose=0):
  24. self._output_name = file_name if file_name else \
  25. title.replace(' ', '_') + '.epub'
  26. self._files = None
  27. self._zip = None # the in memory zip file
  28. self._zip_data = None
  29. self._content = []
  30. self._count = 0
  31. self._series = series
  32. self._series_idx = series_idx
  33. self.d = dict(
  34. title=title,
  35. title_sort=title_sort if title_sort else title,
  36. creator=creator if creator else 'Unknown',
  37. opf_name="c.opf",
  38. toc_name="toc.ncx",
  39. ncx_ns='http://www.daisy.org/z3986/2005/ncx/',
  40. opf_ns='http://www.idpf.org/2007/opf',
  41. xsi_ns='http://www.w3.org/2001/XMLSchema-instance',
  42. dcterms_ns='http://purl.org/dc/terms/',
  43. dc_ns='http://purl.org/dc/elements/1.1/',
  44. cal_ns='http://calibre.kovidgoyal.net/2009/metadata',
  45. cont_urn='urn:oasis:names:tc:opendocument:xmlns:container',
  46. mt='application/oebps-package+xml', # media-type
  47. style_sheet='stylesheet.css',
  48. uuid=None,
  49. nav_point=None,
  50. nav_uuid=None,
  51. )
  52.  
  53. def __enter__(self):
  54. return self
  55.  
  56. def __exit__(self, typ, value, traceback):
  57. if value is None:
  58. if isinstance(self._zip_data, basestring):
  59. return
  60. self._write_toc()
  61. self._write_content()
  62. self._zip.close()
  63. self._zip = None
  64. self.d['nav_point'] = None
  65. with open(self._output_name, 'wb') as ofp:
  66. ofp.write(self._zip_data.getvalue())
  67. # minimal test: listing contents of EPUB
  68. # os.system('unzip -lv ' + self._output_name)
  69. return True
  70. return False
  71.  
  72. def add_image_file(self, file_name):
  73. self._add_image_file(file_name)
  74. self._count += 1
  75.  
  76. def _write_toc(self):
  77. self._add_from_bytes(self.d['toc_name'], dedent("""\
  78. <?xml version='1.0' encoding='utf-8'?>
  79. <ncx xmlns="{ncx_ns}" version="2005-1" xml:lang="eng">
  80. <head>
  81. <meta content="{uuid}" name="dtb:uid"/>
  82. <meta content="2" name="dtb:depth"/>
  83. <meta content="ruamel.jpeg2epub (0.1)" name="dtb:generator"/>
  84. <meta content="0" name="dtb:totalPageCount"/>
  85. <meta content="0" name="dtb:maxPageNumber"/>
  86. </head>
  87. <docTitle>
  88. <text>xx</text>
  89. </docTitle>
  90. <navMap>
  91. <navPoint id="{nav_uuid}" playOrder="1">
  92. <navLabel>
  93. <text>Start</text>
  94. </navLabel>
  95. <content src="{nav_point}"/>
  96. </navPoint>
  97. </navMap>
  98. </ncx>
  99. """).format(**self.d))
  100. self._content.append((self.d['toc_name'], 'ncx',
  101. 'application/x-dtbncx+xml'))
  102.  
  103. def _write_content(self):
  104. d = self.d.copy()
  105. manifest = []
  106. spine = []
  107. d['manifest'] = ''
  108. d['spine'] = ''
  109. for f in self._content:
  110. manifest.append(
  111. '<item href="{}" id="{}" media-type="{}"/>'.format(*f))
  112. if f[1].startswith('html'):
  113. spine.append('<itemref idref="{}"/>'.format(f[1]))
  114. d['manifest'] = '\n '.join(manifest)
  115. d['spine'] = '\n '.join(spine)
  116. d['ts'] = datetime.datetime.utcnow().isoformat() + '+00:00'
  117. d['series'] = ''
  118. if self._series:
  119. d['series'] = \
  120. u'\n' \
  121. '<meta name="calibre:series" content="{}"/>' \
  122. '<meta name="calibre:series_index" content="{}"/>'.format(
  123. self._series, self._series_idx)
  124. self._add_from_bytes(self.d["opf_name"], dedent(u"""\
  125. <?xml version='1.0' encoding='utf-8'?>
  126. <package xmlns="{opf_ns}" unique-identifier="uuid_id" version="2.0">
  127. <metadata xmlns:xsi="{xsi_ns}" xmlns:opf="{opf_ns}"
  128. xmlns:dcterms="{dcterms_ns}"
  129. xmlns:calibre="{cal_ns}"
  130. xmlns:dc="{dc_ns}">
  131. <dc:language>en</dc:language>
  132. <dc:creator>{creator}</dc:creator>
  133. <meta name="calibre:timestamp" content="{ts}"/>
  134. <meta name="calibre:title_sort" content="{title_sort}"/>
  135. <meta name="cover" content="cover"/>
  136. <dc:date>0101-01-01T00:00:00+00:00</dc:date>
  137. <dc:title>{title}</dc:title>{series}
  138. <dc:identifier id="uuid_id" opf:scheme="uuid">{uuid}
  139. </dc:identifier>
  140. <dc:identifier opf:scheme="calibre">{uuid}</dc:identifier>
  141. </metadata>
  142. <manifest>
  143. {manifest}
  144. </manifest>
  145. <spine toc="ncx">
  146. {spine}
  147. </spine>
  148. </package>
  149. """).format(**d).encode('utf-8'))
  150.  
  151. def _add_html(self, title):
  152. file_name = self._name(False)
  153. d = self.d.copy()
  154. d['title'] = title
  155. d['img_name'] = self._name()
  156. self._add_from_bytes(file_name, dedent(u"""\
  157. <?xml version='1.0' encoding='utf-8'?>
  158. <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
  159. <head>
  160. <title>{title}</title>
  161. <meta http-equiv="Content-Type" content="text/html; \
  162. charset=utf-8"/>
  163. <link href="{style_sheet}" rel="stylesheet" type="text/css"/>
  164. </head>
  165. <body class="album">
  166. <div>
  167. <img src="{img_name}" class="albumimg" alt="{title}"/>
  168. </div>
  169. </body>
  170. </html>
  171. """).format(**d).encode('utf-8'))
  172. self._content.append((file_name, 'html{}'.format(self._count),
  173. 'application/xhtml+xml'))
  174. if self.d['nav_point'] is None:
  175. self.d['nav_point'] = file_name
  176. self._write_style_sheet()
  177.  
  178. def _write_style_sheet(self):
  179. file_name = self.d['style_sheet']
  180. self._add_from_bytes(file_name, dedent("""\
  181. .album {
  182. display: block;
  183. font-size: 1em;
  184. padding: 0;
  185. margin: 0;
  186. }
  187. .albumimg {
  188. height: auto;
  189. max-height: 100%;
  190. max-width: 100%;
  191. width: auto
  192. }
  193. """))
  194. self._content.append((file_name, 'css', 'text/css'))
  195.  
  196. def _name(self, image=True):
  197. """no leading zero's necessary in zip internal filenames"""
  198. return '{}.{}'.format(self._count, 'jpg' if image else 'xhtml')
  199.  
  200. def _add_image_file(self, file_name, width=None, height=None,
  201. strip=None, max_strip_pixel=None, z=None):
  202. z = z if z else self.zip # initializes if not done yet
  203. self._add_html(file_name)
  204. # you can compress JPEGs, but with little result (1-8%) and
  205. # more complex/slow decompression (zip then jpeg)
  206. # Gain 2.836 Mb -> 2.798 Mb ( ~ 1% difference )
  207. if width:
  208. im = EpubImage(file_name)
  209. z.writestr(self._name(), im.read(), zipfile.ZIP_STORED)
  210. else:
  211. z.write(file_name, self._name())
  212. self._content.append((self._name(), 'img{}'.format(self._count),
  213. 'image/jpeg'))
  214.  
  215. @property
  216. def zip(self):
  217. if self._zip is not None:
  218. return self._zip
  219. self._zip_data = StringIO()
  220. # create zip with default compression
  221. #self._zip_data = '/var/tmp/epubtmp/yy.zip'
  222. self._zip = zipfile.ZipFile(self._zip_data, "a",
  223. zipfile.ZIP_DEFLATED, False)
  224. self.d['uuid'] = uuid.uuid4()
  225. self.d['nav_uuid'] = uuid.uuid4()
  226. self._add_mimetype()
  227. self._add_container()
  228. return self._zip
  229.  
  230. def _add_from_bytes(self, file_name, data, no_compression=False):
  231. self._zip.writestr(
  232. file_name, data,
  233. compress_type=zipfile.ZIP_STORED if no_compression else None)
  234.  
  235. def _add_mimetype(self):
  236. self._add_from_bytes('mimetype', dedent("""\
  237. application/epub+zip
  238. """).rstrip(), no_compression=True)
  239.  
  240. def _add_container(self):
  241. self._add_from_bytes('META-INF/container.xml', dedent("""\
  242. <?xml version="1.0"?>
  243. <container version="1.0" xmlns="{cont_urn}">
  244. <rootfiles>
  245. <rootfile full-path="{opf_name}" media-type="{mt}"/>
  246. </rootfiles>
  247. </container>
  248. """).rstrip().format(**self.d))
  249.  
  250.  
  251. def main():
  252. import argparse
  253. parser = argparse.ArgumentParser()
  254. parser.add_argument("--title", "-t", required=True)
  255. parser.add_argument("--title-sort", help="alternative title for sorting")
  256. parser.add_argument(
  257. "--output", "-o",
  258. help="epub name if not specified, derived from title",
  259. )
  260. parser.add_argument("--series", help="series name")
  261. parser.add_argument("--index", help="series index")
  262. parser.add_argument("--creator", help="Creator/Author")
  263. parser.add_argument("file_names", nargs="+")
  264. args = parser.parse_args()
  265. with Jpeg2Epub(args.title, title_sort=args.title_sort,
  266. file_name=args.output,
  267. series=args.series, series_idx=args.index,
  268. creator=args.creator, verbose=0) as j2e:
  269. for file_name in args.file_names:
  270. j2e.add_image_file(file_name)
  271.  
  272.  
  273. if __name__ == "__main__":
  274. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement