Guest User

brutusbum

a guest
Dec 1st, 2009
3,458
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2. # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
  3. #
  4. # This is a python script. You need a Python interpreter to run it.
  5. # For example, ActiveState Python, which exists for windows.
  6. # Changelog
  7. #  0.01 - Initial version
  8. #  0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug.
  9. #  0.03 - Fix incorrect variable usage at one place.
  10. #  0.03b - enhancement by DeBockle (version 259 support)
  11. # Custom version 0.03 - no change to eReader support, only usability changes
  12. #   - start of pep-8 indentation (spaces not tab), fix trailing blanks
  13. #   - version variable, only one place to change
  14. #   - added main routine, now callable as a library/module,
  15. #     means tools can add optional support for ereader2html
  16. #   - outdir is no longer a mandatory parameter (defaults based on input name if missing)
  17. #   - time taken output to stdout
  18. #   - Psyco support - reduces runtime by a factor of (over) 3!
  19. #     E.g. (~600Kb file) 90 secs down to 24 secs
  20. #       - newstyle classes
  21. #       - changed map call to list comprehension
  22. #         may not work with python 2.3
  23. #         without Psyco this reduces runtime to 90%
  24. #         E.g. 90 secs down to 77 secs
  25. #         Psyco with map calls takes longer, do not run with map in Psyco JIT!
  26. #       - izip calls used instead of zip (if available), further reduction
  27. #         in run time (factor of 4.5).
  28. #         E.g. (~600Kb file) 90 secs down to 20 secs
  29. #   - Python 2.6+ support, avoid DeprecationWarning with sha/sha1
  30. #  0.04 - Footnote support, PML output, correct charset in html, support more PML tags
  31. #   - Feature change, dump out PML file
  32. #   - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable)
  33. #       in some pdb files :-( due to the same id being used multiple times
  34. #   - Added correct charset encoding (pml is based on cp1252)
  35. #   - Added logging support.
  36. #  
  37. #   TODO run this through a profiler - speed increases so far was from
  38. #   applying "quick known fixes", added (commented out) cprofiler call
  39. #  0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc
  40. #  0.06 - Merge of 0.04 and 0.05. Improved HTML output
  41. #         Placed images in subfolder, so that it's possible to just
  42. #         drop the book.pml file onto DropBook to make an unencrypted
  43. #         copy of the eReader file.
  44. #         Using that with Calibre works a lot better than the HTML
  45. #         conversion in this code.
  46. #  0.07 - Further Improved type 272 support for sidebars with all earlier fixes
  47. #  0.08 - fixed typos, removed extraneous things
  48. #  0.09 - tried to greatly improve html conversion especially with \t tags
  49.  
  50. __version__='0.09'
  51.  
  52. # Import Psyco if available
  53. try:
  54.     # Dumb speed hack 1
  55.     # http://psyco.sourceforge.net
  56.     import psyco
  57.     psyco.full()
  58.     pass
  59. except ImportError:
  60.     pass
  61. try:
  62.     # Dumb speed hack 2
  63.     # All map() calls converted to list comprehension (some use zip)
  64.     # override zip with izip - saves memory and in rough testing
  65.     # appears to be faster zip() is only used in the converted map() calls
  66.     from itertools import izip as zip
  67. except ImportError:
  68.     pass
  69.  
  70. import struct, binascii, zlib, os, sys, os.path, urllib
  71. try:
  72.     from hashlib import sha1
  73. except ImportError:
  74.     # older Python release
  75.     import sha
  76.     sha1 = lambda s: sha.new(s)
  77. import cgi
  78. import logging
  79.  
  80. logging.basicConfig()
  81. #logging.basicConfig(level=logging.DEBUG)
  82.  
  83.  
  84. ECB =   0
  85. CBC =   1
  86. class Des(object):
  87.     __pc1 = [56, 48, 40, 32, 24, 16,  8,  0, 57, 49, 41, 33, 25, 17,
  88.           9,  1, 58, 50, 42, 34, 26, 18, 10,  2, 59, 51, 43, 35,
  89.          62, 54, 46, 38, 30, 22, 14,  6, 61, 53, 45, 37, 29, 21,
  90.          13,  5, 60, 52, 44, 36, 28, 20, 12,  4, 27, 19, 11,  3]
  91.     __left_rotations = [1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1]
  92.     __pc2 = [13, 16, 10, 23,  0,  4,2, 27, 14,  5, 20,  9,
  93.         22, 18, 11,  3, 25,  7, 15,  6, 26, 19, 12,  1,
  94.         40, 51, 30, 36, 46, 54, 29, 39, 50, 44, 32, 47,
  95.         43, 48, 38, 55, 33, 52, 45, 41, 49, 35, 28, 31]
  96.     __ip = [57, 49, 41, 33, 25, 17, 9,  1,  59, 51, 43, 35, 27, 19, 11, 3,
  97.         61, 53, 45, 37, 29, 21, 13, 5,  63, 55, 47, 39, 31, 23, 15, 7,
  98.         56, 48, 40, 32, 24, 16, 8,  0,  58, 50, 42, 34, 26, 18, 10, 2,
  99.         60, 52, 44, 36, 28, 20, 12, 4,  62, 54, 46, 38, 30, 22, 14, 6]
  100.     __expansion_table = [31,  0,  1,  2,  3,  4, 3,  4,  5,  6,  7,  8,
  101.          7,  8,  9, 10, 11, 12,11, 12, 13, 14, 15, 16,
  102.         15, 16, 17, 18, 19, 20,19, 20, 21, 22, 23, 24,
  103.         23, 24, 25, 26, 27, 28,27, 28, 29, 30, 31,  0]
  104.     __sbox = [[14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7,
  105.          0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8,
  106.          4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0,
  107.          15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13],
  108.         [15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10,
  109.          3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5,
  110.          0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15,
  111.          13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9],
  112.         [10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8,
  113.          13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1,
  114.          13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7,
  115.          1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12],
  116.         [7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15,
  117.          13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9,
  118.          10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4,
  119.          3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14],
  120.         [2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9,
  121.          14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6,
  122.          4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14,
  123.          11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3],
  124.         [12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11,
  125.          10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8,
  126.          9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6,
  127.          4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13],
  128.         [4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1,
  129.          13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6,
  130.          1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2,
  131.          6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12],
  132.         [13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7,
  133.          1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2,
  134.          7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8,
  135.          2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11],]
  136.     __p = [15, 6, 19, 20, 28, 11,27, 16, 0, 14, 22, 25,
  137.         4, 17, 30, 9, 1, 7,23,13, 31, 26, 2, 8,18, 12, 29, 5, 21, 10,3, 24]
  138.     __fp = [39,  7, 47, 15, 55, 23, 63, 31,38,  6, 46, 14, 54, 22, 62, 30,
  139.         37,  5, 45, 13, 53, 21, 61, 29,36,  4, 44, 12, 52, 20, 60, 28,
  140.         35,  3, 43, 11, 51, 19, 59, 27,34,  2, 42, 10, 50, 18, 58, 26,
  141.         33,  1, 41,  9, 49, 17, 57, 25,32,  0, 40,  8, 48, 16, 56, 24]
  142.     # Type of crypting being done
  143.     ENCRYPT =   0x00
  144.     DECRYPT =   0x01
  145.     def __init__(self, key, mode=ECB, IV=None):
  146.         if len(key) != 8:
  147.             raise ValueError("Invalid DES key size. Key must be exactly 8 bytes long.")
  148.         self.block_size = 8
  149.         self.key_size = 8
  150.         self.__padding = ''
  151.         self.setMode(mode)
  152.         if IV:
  153.             self.setIV(IV)
  154.         self.L = []
  155.         self.R = []
  156.         self.Kn = [ [0] * 48 ] * 16 # 16 48-bit keys (K1 - K16)
  157.         self.final = []
  158.         self.setKey(key)
  159.     def getKey(self):
  160.         return self.__key
  161.     def setKey(self, key):
  162.         self.__key = key
  163.         self.__create_sub_keys()
  164.     def getMode(self):
  165.         return self.__mode
  166.     def setMode(self, mode):
  167.         self.__mode = mode
  168.     def getIV(self):
  169.         return self.__iv
  170.     def setIV(self, IV):
  171.         if not IV or len(IV) != self.block_size:
  172.             raise ValueError("Invalid Initial Value (IV), must be a multiple of " + str(self.block_size) + " bytes")
  173.         self.__iv = IV
  174.     def getPadding(self):
  175.         return self.__padding
  176.     def __String_to_BitList(self, data):
  177.         l = len(data) * 8
  178.         result = [0] * l
  179.         pos = 0
  180.         for c in data:
  181.             i = 7
  182.             ch = ord(c)
  183.             while i >= 0:
  184.                 if ch & (1 << i) != 0:
  185.                     result[pos] = 1
  186.                 else:
  187.                     result[pos] = 0
  188.                 pos += 1
  189.                 i -= 1
  190.         return result
  191.     def __BitList_to_String(self, data):
  192.         result = ''
  193.         pos = 0
  194.         c = 0
  195.         while pos < len(data):
  196.             c += data[pos] << (7 - (pos % 8))
  197.             if (pos % 8) == 7:
  198.                 result += chr(c)
  199.                 c = 0
  200.             pos += 1
  201.         return result
  202.     def __permutate(self, table, block):
  203.         return [block[x] for x in table]
  204.     def __create_sub_keys(self):
  205.         key = self.__permutate(Des.__pc1, self.__String_to_BitList(self.getKey()))
  206.         i = 0
  207.         self.L = key[:28]
  208.         self.R = key[28:]
  209.         while i < 16:
  210.             j = 0
  211.             while j < Des.__left_rotations[i]:
  212.                 self.L.append(self.L[0])
  213.                 del self.L[0]
  214.                 self.R.append(self.R[0])
  215.                 del self.R[0]
  216.                 j += 1
  217.             self.Kn[i] = self.__permutate(Des.__pc2, self.L + self.R)
  218.             i += 1
  219.     def __des_crypt(self, block, crypt_type):
  220.         block = self.__permutate(Des.__ip, block)
  221.         self.L = block[:32]
  222.         self.R = block[32:]
  223.         if crypt_type == Des.ENCRYPT:
  224.             iteration = 0
  225.             iteration_adjustment = 1
  226.         else:
  227.             iteration = 15
  228.             iteration_adjustment = -1
  229.         i = 0
  230.         while i < 16:
  231.             tempR = self.R[:]
  232.             self.R = self.__permutate(Des.__expansion_table, self.R)
  233.             self.R = [x ^ y for x,y in zip(self.R, self.Kn[iteration])]
  234.             B = [self.R[:6], self.R[6:12], self.R[12:18], self.R[18:24], self.R[24:30], self.R[30:36], self.R[36:42], self.R[42:]]
  235.             j = 0
  236.             Bn = [0] * 32
  237.             pos = 0
  238.             while j < 8:
  239.                 m = (B[j][0] << 1) + B[j][5]
  240.                 n = (B[j][1] << 3) + (B[j][2] << 2) + (B[j][3] << 1) + B[j][4]
  241.                 v = Des.__sbox[j][(m << 4) + n]
  242.                 Bn[pos] = (v & 8) >> 3
  243.                 Bn[pos + 1] = (v & 4) >> 2
  244.                 Bn[pos + 2] = (v & 2) >> 1
  245.                 Bn[pos + 3] = v & 1
  246.                 pos += 4
  247.                 j += 1
  248.             self.R = self.__permutate(Des.__p, Bn)
  249.             self.R = [x ^ y for x, y in zip(self.R, self.L)]
  250.             self.L = tempR
  251.             i += 1
  252.             iteration += iteration_adjustment
  253.         self.final = self.__permutate(Des.__fp, self.R + self.L)
  254.         return self.final
  255.     def crypt(self, data, crypt_type):
  256.         if not data:
  257.             return ''
  258.         if len(data) % self.block_size != 0:
  259.             if crypt_type == Des.DECRYPT: # Decryption must work on 8 byte blocks
  260.                 raise ValueError("Invalid data length, data must be a multiple of " + str(self.block_size) + " bytes\n.")
  261.             if not self.getPadding():
  262.                 raise ValueError("Invalid data length, data must be a multiple of " + str(self.block_size) + " bytes\n. Try setting the optional padding character")
  263.             else:
  264.                 data += (self.block_size - (len(data) % self.block_size)) * self.getPadding()
  265.         if self.getMode() == CBC:
  266.             if self.getIV():
  267.                 iv = self.__String_to_BitList(self.getIV())
  268.             else:
  269.                 raise ValueError("For CBC mode, you must supply the Initial Value (IV) for ciphering")
  270.         i = 0
  271.         dict = {}
  272.         result = []
  273.         while i < len(data):
  274.             block = self.__String_to_BitList(data[i:i+8])
  275.             if self.getMode() == CBC:
  276.                 if crypt_type == Des.ENCRYPT:
  277.                     block = [x ^ y for x, y in zip(block, iv)]
  278.                 processed_block = self.__des_crypt(block, crypt_type)
  279.                 if crypt_type == Des.DECRYPT:
  280.                     processed_block = [x ^ y for x, y in zip(processed_block, iv)]
  281.                     iv = block
  282.                 else:
  283.                     iv = processed_block
  284.             else:
  285.                 processed_block = self.__des_crypt(block, crypt_type)
  286.             result.append(self.__BitList_to_String(processed_block))
  287.             i += 8
  288.         if crypt_type == Des.DECRYPT and self.getPadding():
  289.             s = result[-1]
  290.             while s[-1] == self.getPadding():
  291.                 s = s[:-1]
  292.             result[-1] = s
  293.         return ''.join(result)
  294.     def encrypt(self, data, pad=''):
  295.         self.__padding = pad
  296.         return self.crypt(data, Des.ENCRYPT)
  297.     def decrypt(self, data, pad=''):
  298.         self.__padding = pad
  299.         return self.crypt(data, Des.DECRYPT)
  300.  
  301. class Sectionizer(object):
  302.     def __init__(self, filename, ident):
  303.         self.contents = file(filename, 'rb').read()
  304.         self.header = self.contents[0:72]
  305.         self.num_sections, = struct.unpack('>H', self.contents[76:78])
  306.         if self.header[0x3C:0x3C+8] != ident:
  307.             raise ValueError('Invalid file format')
  308.         self.sections = []
  309.         for i in xrange(self.num_sections):
  310.             offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
  311.             flags, val = a1, a2<<16|a3<<8|a4
  312.             self.sections.append( (offset, flags, val) )
  313.     def loadSection(self, section):
  314.         if section + 1 == self.num_sections:
  315.             end_off = len(self.contents)
  316.         else:
  317.             end_off = self.sections[section + 1][0]
  318.         off = self.sections[section][0]
  319.         return self.contents[off:end_off]
  320.  
  321. def sanitizeFileName(s):
  322.     r = ''
  323.     for c in s.lower():
  324.         if c in "abcdefghijklmnopqrstuvwxyz0123456789_.-":
  325.             r += c
  326.     return r
  327.  
  328. def fixKey(key):
  329.     def fixByte(b):
  330.         return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80)
  331.     return  "".join([chr(fixByte(ord(a))) for a in key])
  332.  
  333. def deXOR(text, sp, table):
  334.     r=''
  335.     j = sp
  336.     for i in xrange(len(text)):
  337.         r += chr(ord(table[j]) ^ ord(text[i]))
  338.         j = j + 1
  339.         if j == len(table):
  340.             j = 0
  341.     return r
  342.  
  343. class EreaderProcessor(object):
  344.     def __init__(self, section_reader, username, creditcard):
  345.         self.section_reader = section_reader
  346.         data = section_reader(0)
  347.         version,  = struct.unpack('>H', data[0:2])
  348.         self.version = version
  349.         logging.info('eReader file format version %s', version)
  350.         if version != 272 and version != 260 and version != 259:
  351.             raise ValueError('incorrect eReader version %d (error 1)' % version)
  352.         data = section_reader(1)
  353.         self.data = data
  354.         des = Des(fixKey(data[0:8]))
  355.         cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:]))
  356.         if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200:
  357.             raise ValueError('incorrect eReader version (error 2)')
  358.         input = des.decrypt(data[-cookie_size:])
  359.         def unshuff(data, shuf):
  360.             r = [''] * len(data)
  361.             j = 0
  362.             for i in xrange(len(data)):
  363.                 j = (j + shuf) % len(data)
  364.                 r[j] = data[i]
  365.             assert  len("".join(r)) == len(data)
  366.             return "".join(r)
  367.         r = unshuff(input[0:-8], cookie_shuf)
  368.  
  369.         def fixUsername(s):
  370.             r = ''
  371.             for c in s.lower():
  372.                 if (c >= 'a' and c <= 'z' or c >= '0' and c <= '9'):
  373.                     r += c
  374.             return r
  375.  
  376.         user_key = struct.pack('>LL', binascii.crc32(fixUsername(username)) & 0xffffffff, binascii.crc32(creditcard[-8:])& 0xffffffff)
  377.         drm_sub_version = struct.unpack('>H', r[0:2])[0]
  378.         self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1
  379.         self.num_image_pages = struct.unpack('>H', r[26:26+2])[0]
  380.         self.first_image_page = struct.unpack('>H', r[24:24+2])[0]
  381.         if self.version == 272:
  382.             self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0]
  383.             self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0]
  384.             self.num_link_pages = struct.unpack('>H', r[30:30+2])[0]
  385.             self.first_link_page = struct.unpack('>H', r[28:28+2])[0]
  386.             self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0]
  387.             self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0]
  388.             self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0]
  389.             self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0]
  390.             self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0]
  391.             self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0]
  392.             self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0]
  393.             self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0]
  394.  
  395.             # **before** data record 1 was decrypted and unshuffled, it contained data
  396.             # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc
  397.             self.xortable_offset  = struct.unpack('>H', r[40:40+2])[0]
  398.             self.xortable_size = struct.unpack('>H', r[42:42+2])[0]
  399.             self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size]
  400.         else:
  401.             self.num_chapter_pages = 0
  402.             self.num_link_pages = 0
  403.             self.num_bookinfo_pages = 0
  404.             self.num_footnote_pages = 0
  405.             self.num_xtextsize_pages = 0
  406.             self.num_sidebar_pages = 0
  407.             self.first_chapter_page = -1
  408.             self.first_link_page = -1
  409.             self.first_bookinfo_page = -1
  410.             self.first_footnote_page = -1
  411.             self.first_xtextsize_page = -1
  412.             self.first_sidebar_page = -1
  413.  
  414.         logging.debug('self.num_text_pages %d', self.num_text_pages)
  415.         logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page)
  416.         logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page)
  417.         self.flags = struct.unpack('>L', r[4:8])[0]
  418.         reqd_flags = (1<<9) | (1<<7) | (1<<10)
  419.         if (self.flags & reqd_flags) != reqd_flags:
  420.             print "Flags: 0x%X" % self.flags
  421.             raise ValueError('incompatible eReader file')
  422.         des = Des(fixKey(user_key))
  423.         if version == 259:
  424.             if drm_sub_version != 7:
  425.                 raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
  426.             encrypted_key_sha = r[44:44+20]
  427.             encrypted_key = r[64:64+8]
  428.         elif version == 260:
  429.             if drm_sub_version != 13:
  430.                 raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
  431.             encrypted_key = r[44:44+8]
  432.             encrypted_key_sha = r[52:52+20]
  433.         elif version == 272:
  434.             encrypted_key = r[172:172+8]
  435.             encrypted_key_sha = r[56:56+20]
  436.         self.content_key = des.decrypt(encrypted_key)
  437.         if sha1(self.content_key).digest() != encrypted_key_sha:
  438.             raise ValueError('Incorrect Name and/or Credit Card')
  439.  
  440.     def getNumImages(self):
  441.         return self.num_image_pages
  442.  
  443.     def getImage(self, i):
  444.         sect = self.section_reader(self.first_image_page + i)
  445.         name = sect[4:4+32].strip('\0')
  446.         data = sect[62:]
  447.         return sanitizeFileName(name), data
  448.  
  449.     def getChapterNamePMLOffsetData(self):
  450.         cv = ''
  451.         if self.num_chapter_pages > 0:
  452.             for i in xrange(self.num_chapter_pages):
  453.                 chaps = self.section_reader(self.first_chapter_page + i)
  454.                 j = i % self.xortable_size
  455.                 offname = deXOR(chaps, j, self.xortable)
  456.                 offset = struct.unpack('>L', offname[0:4])[0]
  457.                 name = offname[4:].strip('\0')
  458.                 cv += '%d,%s\n' % (offset, name)
  459.         return cv
  460.  
  461.     def getLinkNamePMLOffsetData(self):
  462.         lv = ''
  463.         if self.num_link_pages > 0:
  464.             for i in xrange(self.num_link_pages):
  465.                 links = self.section_reader(self.first_link_page + i)
  466.                 j = i % self.xortable_size
  467.                 offname = deXOR(links, j, self.xortable)
  468.                 offset = struct.unpack('>L', offname[0:4])[0]
  469.                 name = offname[4:].strip('\0')
  470.                 lv += '%d,%s\n' % (offset, name)
  471.         return lv
  472.  
  473.     def getExpandedTextSizesData(self):
  474.          ts = ''
  475.          if self.num_xtextsize_pages > 0:
  476.              tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable)
  477.              for i in xrange(self.num_text_pages):
  478.                  xsize = struct.unpack('>H', tsize[0:2])[0]
  479.                  ts += "%d\n" % xsize
  480.                  tsize = tsize[2:]
  481.          return ts
  482.  
  483.     def getBookInfo(self):
  484.         bkinfo = ''
  485.         if self.num_bookinfo_pages > 0:
  486.             info = self.section_reader(self.first_bookinfo_page)
  487.             bkinfo = deXOR(info, 0, self.xortable)
  488.         return bkinfo
  489.  
  490.     def getText(self):
  491.         des = Des(fixKey(self.content_key))
  492.         r = ''
  493.         for i in xrange(self.num_text_pages):
  494.             logging.debug('get page %d', i)
  495.             r += zlib.decompress(des.decrypt(self.section_reader(1 + i)))
  496.              
  497.         # now handle footnotes pages
  498.         if self.num_footnote_pages > 0:
  499.             # the record 0 of the footnote section must pass through the Xor Table to make it useful
  500.             sect = self.section_reader(self.first_footnote_page)
  501.             fnote_ids = deXOR(sect, 0, self.xortable)
  502.             # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated
  503.             des = Des(fixKey(self.content_key))
  504.             r += '\\w="100%"'
  505.             r += '\\pFootnotes:\n\n'
  506.             for i in xrange(1,self.num_footnote_pages):
  507.                 logging.debug('get footnotepage %d', i)
  508.                 id_len = ord(fnote_ids[2])
  509.                 id = fnote_ids[3:3+id_len]
  510.                 fmarker='\\t\\Q="footnote-%s"' % id
  511.                 r+=fmarker
  512.                 r += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
  513.                 r += '\\t\n\n'
  514.                 fnote_ids = fnote_ids[id_len+4:]
  515.  
  516. #               according to ereader pml spec we should be outputing the following xml for each footnote - but then we would have to handle
  517. #               parsing it back in to convert it since that xml is not valid xhtml
  518. #                fmarker = '<footnote id="footnote-%s">\n' % id
  519. #                fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
  520. #                fmarker += '\n</footnote>\n'
  521. #                r += fmarker
  522.  
  523.  
  524.         # now handle sidebar pages
  525.         if self.num_sidebar_pages > 0:
  526.             # the record 0 of the sidebar section must pass through the Xor Table to make it useful
  527.             sect = self.section_reader(self.first_sidebar_page)
  528.             sbar_ids = deXOR(sect, 0, self.xortable)
  529.             # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated
  530.             des = Des(fixKey(self.content_key))
  531.             r += '\\w="100%"'
  532.             r += '\\pSidebars:\n\n'
  533.             for i in xrange(1,self.num_sidebar_pages):
  534.                 id_len = ord(sbar_ids[2])
  535.                 id = sbar_ids[3:3+id_len]
  536.                 smarker='\\t\\Q="sidebar-%s"' % id
  537.                 r+=smarker
  538.                 r += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i)))
  539.                 r += '\\t\n\n'
  540.                 sbar_ids = sbar_ids[id_len+4:]
  541.  
  542. #               according to ereader pml spec we should be outputing the following xml for each sidebar - but then we would have to handle
  543. #               parsing it back in to convert it since that xml is not valid xhtml
  544. #                smarker = '<sidebar id="sidebar-%s">\n' % id
  545. #                smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
  546. #                smarker += '\n</sidebar>\n'
  547. #                r += smarker
  548.  
  549.         return r
  550.  
  551. class PmlConverter(object):
  552.     def __init__(self, s, bkinfo):
  553.         def findPrevStartofLine(src,p,n):
  554.             # find last end of previous line in substring from p to n
  555.             b1 = src.rfind('\n',p,n)
  556.             b2 = src.rfind('\\c',p,n)
  557.             b3 = src.rfind('\\r',p,n)
  558.             b4 = src.rfind('\\x',p,n)
  559.             b5 = src.rfind('\\p',p,n)
  560.             b = max(b1, b2, b3, b4, b5)
  561.             if b == -1:
  562.                 return n
  563.             if b == b1:
  564.                 return b + 1
  565.             return b + 2
  566.         def markHangingIndents(src):
  567.             r = ''
  568.             p = 0
  569.             while True:
  570.                 if p > len(src):
  571.                     return r
  572.                 n = src.find('\\t', p)
  573.                 if n == -1:
  574.                     r += src[p:]
  575.                     return r
  576.                 pc = findPrevStartofLine(src,p,n)
  577.                 if pc == n :
  578.                     # \t tag is at start of line so indent block will work
  579.                     end = src.find('\\t',n+2)
  580.                     if end == -1:
  581.                         end = n
  582.                     r += src[p:end+2]
  583.                     p = end + 2
  584.                 else :
  585.                     # \t tag not at start of line so hanging indent case
  586.                     # recode \t to pseudo \h tags and move it to start of this line
  587.                     # and recode its close as well
  588.                     r += src[p:pc] + '\\h' + src[pc:n]
  589.                     end = src.find('\\t',n+2)
  590.                     if end == -1:
  591.                         end = n+2
  592.                     r += src[n+2:end] + '\\h'
  593.                     p = end + 2
  594.         self.s = markHangingIndents(s)
  595.         # file(os.path.join("./pseudo.pml"), 'wb').write(self.s)
  596.         self.pos = 0
  597.         self.bkinfo = bkinfo
  598.     def nextOptAttr(self):
  599.         p = self.pos
  600.         if self.s[p:p+2] != '="':
  601.             return None
  602.         r = ''
  603.         p += 2
  604.         while self.s[p] != '"':
  605.             r += self.s[p]
  606.             p += 1
  607.         self.pos = p + 1
  608.         return r
  609.     def next(self):
  610.         p = self.pos
  611.         if p >= len(self.s):
  612.             return None
  613.         if self.s[p] != '\\':
  614.             res = self.s.find('\\', p)
  615.             if res == -1:
  616.                 res = len(self.s)
  617.             self.pos = res
  618.             return self.s[p : res], None, None
  619.         c = self.s[p+1]
  620.         # add in support for new pseudo tag \\h
  621.         if c in 'pxcriuovthnsblBk-lI\\d':
  622.             self.pos = p + 2
  623.             return None, c, None
  624.         if c in 'TwmqQ':
  625.             self.pos = p + 2
  626.             return None, c, self.nextOptAttr()
  627.         if c == 'a':
  628.             self.pos = p + 5
  629.             return None, c, int(self.s[p+2:p+5])
  630.         if c == 'U':
  631.             self.pos = p + 6
  632.             return None, c, int(self.s[p+2:p+6], 16)
  633.         c = self.s[p+1:p+1+2]
  634.         if c in ('X0','X1','X2','X3','X4','Sp','Sb'):
  635.             self.pos = p + 3
  636.             return None, c, None
  637.         if c in ('C0','C1','C2','C3','C4','Fn','Sd'):
  638.             self.pos = p + 3
  639.             return None, c, self.nextOptAttr()
  640.         print "unknown escape code %s" % c
  641.         self.pos = p + 1
  642.         return None, None, None
  643.     def LinePrinter(link):
  644.         return '<hr width="%s" />\n' % link
  645.     def LinkPrinter(link):
  646.         return '<a href="%s">' % link
  647.     def FootnoteLinkPrinter(link):
  648.         return '<a href="#footnote-%s">' % link
  649.     def SidebarLinkPrinter(link):
  650.         return '<a href="#sidebar-%s">' % link
  651.     def NotSupported(link):
  652.         raise NotImplemented()
  653.     def IndentPercent(link):
  654.         return '<span style="padding-left: %s%%;"></span>' %link
  655.     def NormalFont(link):
  656.         print "Nonfatal Error: NormalFont not implemented."
  657.         return '<!-- NormalFont %s -->' %link
  658.     def StdFont(link):
  659.         print "Nonfatal Error: StdFont not implemented."
  660.         return '<!-- StdFont: %s -->' %link
  661.    
  662.     # See http://wiki.mobileread.com/wiki/PML#Palm_Markup_Language
  663.     html_tags = {
  664.         'c' : ('<div class="center">', '</div>'),
  665.         'r' : ('<div class="right">', '</div>'),
  666.         'i' : ('<i>', '</i>'),
  667.         'u' : ('<span class="under">', '</span>'),
  668.         'b' : ('<strong>', '</strong>'),
  669.         'B' : ('<strong>', '</strong>'),
  670.         'o' : ('<del>', '</del>'),
  671.         'v' : ('<!-- ', ' -->'),
  672.         't' : ('<div class="indent">','</div>'),
  673.         'h' : ('<div class="hang">','</div>'), # pseudo-tag created to handle hanging indent cases
  674.         'Sb' : ('<sub>', '</sub>'),
  675.         'Sp' : ('<sup>', '</sup>'),
  676.         'X0' : ('<h1>', '</h1>'),
  677.         'X1' : ('<h2>', '</h2>'),
  678.         'X2' : ('<h3>', '</h3>'),
  679.         'X3' : ('<h4>', '</h4>'),
  680.         'X4' : ('<h5>', '</h5>'),
  681.         'l' : ('<span class="big">', '</span>'),
  682.         'q' : (LinkPrinter, '</a>'),
  683.         'Fn' : (FootnoteLinkPrinter, '</a>'),
  684.         'Sd' : (SidebarLinkPrinter, '</a>'),
  685.         'w' : (LinePrinter, ''),
  686.         #'m' : handled in if block,
  687.         #'Q' : handled in if block,
  688.         #'a' : handled in if block,
  689.         #'U' : handled in if block,
  690.         'x' : ('<h1 class="breakbefore">', '</h1>'),
  691.         #'C0' : handled in if block,
  692.         #'C1' : handled in if block,
  693.         #'C2' : handled in if block,
  694.         #'C3' : handled in if block,
  695.         #'C4' : handled in if block,
  696.         'T' : (IndentPercent, ''),
  697.         'n' : (NormalFont, ''),
  698.         's' : ('', ''),
  699.         'k' : ('<span class="small">', '</span>'),
  700.         'I' : ('<i>', '</i>'), # according to calibre - all ereader does is italicize the index entries
  701.     }
  702.     html_one_tags = {
  703.         'p' : '<p class="breakafter">&nbsp;</p>\n',
  704.         '\\': '\\',
  705.         '-' : '&shy;',
  706.     }
  707.     pml_chars = {
  708.         160 : '&nbsp;',130 : '&#8212;',131: '&#402;',132: '&#8222;',
  709.         133: '&#8230;',134: '&#8224;',135: '&#8225;',138: '&#352;',
  710.         139: '&#8249;',140: '&#338;',145: '&#8216;',146: '&#8217;',
  711.         147: '&#8220;',148: '&#8221;',149: '&#8226;',150: '&#8211;',
  712.         151: '&#8212;',153: '&#8482;',154: '&#353;',155: '&#8250;',
  713.         156: '&#339;',159: '&#376;'
  714.     }
  715.     def process(self):
  716.         final = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n'
  717.         final += '<html>\n<head>\n<meta http-equiv="content-type" content="text/html; charset=windows-1252">\n'
  718.         if len(self.bkinfo) > 0:
  719.             title, author, copyright, publisher, isbn = self.bkinfo.split('\0',4)
  720.             isbn = isbn.strip('\0')
  721.             final += '<meta name="Title" content="%s"/>\n' % title
  722.             final += '<meta name="Author" content="%s"/>\n' % author
  723.             final += '<meta name="Copyright" content="%s"/>\n' % copyright
  724.             final += '<meta name="Publisher" content="%s"/>\n' % publisher
  725.             final += '<meta name="ISBN" content="%s"/>\n' % isbn
  726.             final += '<style type="text/css">\n'
  727.             final += 'div.center { text-align:center; }\n'
  728.             final += 'div.right { text-align:right; }\n'
  729.             final += 'div.indent { margin-left: 5%; }\n'
  730.             final += 'div.hang { text-indent: -5%; margin-left: 5%; }\n'
  731.             final += 'span.big { font-size: 175%; }\n'
  732.             final += 'span.small { font-size: 50%; }\n'
  733.             final += 'span.under { text-decoration: underline; }\n'
  734.             final += '.breakbefore { page-break-before: always; }\n'
  735.             final += '.breakafter { page-break-after: always; }\n'
  736.             final += '</style>\n'
  737.         final += '</head><body>\n'
  738.         in_tags = []
  739.         def makeText(s):
  740.             s = s.replace('&', '&amp;')
  741.             #s = s.replace('"', '&quot;')
  742.             s = s.replace('<', '&lt;')
  743.             s = s.replace('>', '&gt;')
  744.             s = s.replace('\n', '<br />\n')
  745.             return s
  746.         while True:
  747.             r = self.next()
  748.             if not r:
  749.                 break
  750.             text, cmd, attr = r
  751.             if text:
  752.                 final += makeText(text)
  753.             if cmd:
  754.                 def getTag(ti, end):
  755.                     cmd, attr = ti
  756.                     r = self.html_tags[cmd][end]
  757.                     if type(r) != str:
  758.                         r = r(attr)
  759.                     return r
  760.                 if cmd in self.html_tags:
  761.                     pair = (cmd, attr)
  762.                     if cmd not in [a for (a,b) in in_tags]:
  763.                         final += getTag(pair, False)
  764.                         in_tags.append(pair)
  765.                     else:
  766.                         j = len(in_tags)
  767.                         while True:
  768.                             j = j - 1
  769.                             final += getTag(in_tags[j], True)
  770.                             if in_tags[j][0] == cmd:
  771.                                 break
  772.                         del in_tags[j]
  773.                         while j < len(in_tags):
  774.                             final += getTag(in_tags[j], False)
  775.                             j = j + 1
  776.  
  777.                 if cmd in self.html_one_tags:
  778.                     final += self.html_one_tags[cmd]
  779.                 if cmd == 'm':
  780.                     unquotedimagepath = "images/" + attr
  781.                     imagepath = urllib.quote( unquotedimagepath )
  782.                     final += '<img src="%s" alt="">' % imagepath
  783.                 if cmd == 'Q':
  784.                     final += '<span id="%s"> </span>' % attr
  785.                 if cmd == 'C0':
  786.                     final += '<!-- ContentsList "%s" -->' % attr
  787.                 if cmd == 'C1':
  788.                     final += '<!-- ContentsList " %s" -->' % attr
  789.                 if cmd == 'C2':
  790.                     final += '<!-- ContentsList "  %s" -->' % attr
  791.                 if cmd == 'C3':
  792.                     final += '<!-- ContentsList "   %s" -->' % attr
  793.                 if cmd == 'C4':
  794.                     final += '<!-- ContentsList "    %s" -->' % attr
  795.                 if cmd == 'a':
  796.                     final += self.pml_chars.get(attr, '&#%d;' % attr)
  797.                 if cmd == 'U':
  798.                     final += '&#%d;' % attr
  799.         final += '</body></html>\n'
  800.         # while True:
  801.         #    s = final.replace('<br />\n<br />\n<br />\n', '<br />\n<br />\n')
  802.         #    if s == final:
  803.         #        break
  804.         #    final = s
  805.         s = final.replace('</div><br />','</div>\n')
  806.         final = s
  807.         return final
  808.  
  809. def convertEreaderToHtml(infile, name, cc, outdir):
  810.     if not os.path.exists(outdir):
  811.         os.makedirs(outdir)
  812.     sect = Sectionizer(infile, 'PNRdPPrs')
  813.     er = EreaderProcessor(sect.loadSection, name, cc)
  814.  
  815.     if er.getNumImages() > 0:
  816.         imagedir = "images/"
  817.         imagedirpath = os.path.join(outdir,imagedir)
  818.         if not os.path.exists(imagedirpath):
  819.             os.makedirs(imagedirpath)
  820.     for i in xrange(er.getNumImages()):
  821.         name, contents = er.getImage(i)
  822.         file(os.path.join(imagedirpath, name), 'wb').write(contents)
  823.  
  824.     pml_string = er.getText()
  825.     pmlfilename = bookname + ".pml"
  826.     file(os.path.join(outdir, pmlfilename),'wb').write(pml_string)
  827.  
  828.     bkinfo = er.getBookInfo()
  829.  
  830.     pml = PmlConverter(pml_string, bkinfo)
  831.     htmlfilename = bookname + ".html"
  832.     file(os.path.join(outdir, htmlfilename),'wb').write(pml.process())
  833.  
  834.     # ts = er.getExpandedTextSizesData()
  835.     # file(os.path.join(outdir, 'xtextsizes.dat'), 'wb').write(ts)
  836.  
  837.     cv = er.getChapterNamePMLOffsetData()
  838.     file(os.path.join(outdir, 'chapters.dat'), 'wb').write(cv)
  839.    
  840.     # lv = er.getLinkNamePMLOffsetData()
  841.     # file(os.path.join(outdir, 'links.dat'), 'wb').write(lv)
  842.  
  843.  
  844. def main(argv=None):
  845.     global bookname
  846.     if argv is None:
  847.         argv = sys.argv
  848.    
  849.     print "eReader2Html v%s. Copyright (c) 2008 The Dark Reverser" % __version__
  850.  
  851.     if len(argv)!=4 and len(argv)!=5:
  852.         print "Converts DRMed eReader books to PML Source and HTML"
  853.         print "Usage:"
  854.         print "  ereader2html infile.pdb [outdir] \"your name\" credit_card_number "
  855.         print "Note:"
  856.         print "  if ommitted, outdir defaults based on 'infile.pdb'"
  857.         print "  It's enough to enter the last 8 digits of the credit card number"
  858.     else:
  859.         if len(argv)==4:
  860.             infile, name, cc = argv[1], argv[2], argv[3]
  861.             outdir = infile[:-4] + '_Source'
  862.         elif len(argv)==5:
  863.             infile, outdir, name, cc = argv[1], argv[2], argv[3], argv[4]
  864.         bookname = os.path.splitext(os.path.basename(infile))[0]
  865.  
  866.         try:
  867.             print "Processing..."
  868.             import time
  869.             start_time = time.time()
  870.             convertEreaderToHtml(infile, name, cc, outdir)
  871.             end_time = time.time()
  872.             search_time = end_time - start_time
  873.             print 'elapsed time: %.2f seconds' % (search_time, )
  874.             print 'output in %s' % outdir
  875.             print "done"
  876.         except ValueError, e:
  877.             print "Error: %s" % e
  878.  
  879. if __name__ == "__main__":
  880.     #import cProfile
  881.     #command = """sys.exit(main())"""
  882.     #cProfile.runctx( command, globals(), locals(), filename="cprofile.profile" )
  883.    
  884.     sys.exit(main())
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

Ă—