Guest User

Apprentice

a guest
Nov 5th, 2009
301
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2. # -*- coding: ascii -*-
  3. # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
  4. #
  5. # This is a python script. You need a Python interpreter to run it.
  6. # For example, ActiveState Python, which exists for windows.
  7. # Changelog
  8. #  0.01 - Initial version
  9. #  0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug.
  10. #  0.03 - Fix incorrect variable usage at one place.
  11. #  0.03b - enhancement by DeBockle (version 259 support)
  12. # Custom version 0.03 - no change to eReader support, only usability changes
  13. #   - start of pep-8 indentation (spaces not tab), fix trailing blanks
  14. #   - version variable, only one place to change
  15. #   - added main routine, now callable as a library/module,
  16. #     means tools can add optional support for ereader2html
  17. #   - outdir is no longer a mandatory parameter (defaults based on input name if missing)
  18. #   - time taken output to stdout
  19. #   - Psyco support - reduces runtime by a factor of (over) 3!
  20. #     E.g. (~600Kb file) 90 secs down to 24 secs
  21. #       - newstyle classes
  22. #       - changed map call to list comprehension
  23. #         may not work with python 2.3
  24. #         without Psyco this reduces runtime to 90%
  25. #         E.g. 90 secs down to 77 secs
  26. #         Psyco with map calls takes longer, do not run with map in Psyco JIT!
  27. #       - izip calls used instead of zip (if available), further reduction
  28. #         in run time (factor of 4.5).
  29. #         E.g. (~600Kb file) 90 secs down to 20 secs
  30. #   - Python 2.6+ support, avoid DeprecationWarning with sha/sha1
  31. #  0.04 - Footnote support, PML output, correct charset in html, support more PML tags
  32. #   - Feature change, dump out PML file
  33. #   - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable)
  34. #       in some pdb files :-( due to the same id being used multiple times
  35. #   - Added correct charset encoding (pml is based on cp1252)
  36. #   - Added logging support.
  37. #  
  38. #   TODO run this through a profiler - speed increases so far was from
  39. #   applying "quick known fixes", added (commented out) cprofiler call
  40. #  0.05 - Support for more things in type 272
  41. #  0.06 - Merge of 0.04 and 0.05. Improved HTML output
  42. #         Placed images in subfolder, so that it's possible to just
  43. #         drop the book.pml file onto DropBook to make an unencrypted
  44. #         copy of the eReader file.
  45. #         Using that with Calibre works a lot better than the HTML
  46. #         conversion in this code.
  47.  
  48. __version__='0.06'
  49. DEBUG = 0
  50.  
  51.  
  52. # Import Psyco if available
  53. try:
  54.     # Dumb speed hack 1
  55.     # http://psyco.sourceforge.net
  56.     import psyco
  57.     psyco.full()
  58.     pass
  59. except ImportError:
  60.     pass
  61. try:
  62.     # Dumb speed hack 2
  63.     # All map() calls converted to list comprehension (some use zip)
  64.     # override zip with izip - saves memory and in rough testing
  65.     # appears to be faster zip() is only used in the converted map() calls
  66.     from itertools import izip as zip
  67. except ImportError:
  68.     pass
  69.  
  70. import struct, binascii, zlib, os, sys, os.path, urllib
  71. try:
  72.     from hashlib import sha1
  73. except ImportError:
  74.     # older Python release
  75.     import sha
  76.     sha1 = lambda s: sha.new(s)
  77. import cgi
  78. import logging
  79.  
  80. logging.basicConfig()
  81. #logging.basicConfig(level=logging.DEBUG)
  82.  
  83. write_pml=False
  84.  
  85. ECB = 0
  86. CBC = 1
  87. class Des(object):
  88.     __pc1 = [56, 48, 40, 32, 24, 16,  8,  0, 57, 49, 41, 33, 25, 17,
  89.           9,  1, 58, 50, 42, 34, 26, 18, 10,  2, 59, 51, 43, 35,
  90.          62, 54, 46, 38, 30, 22, 14,  6, 61, 53, 45, 37, 29, 21,
  91.          13,  5, 60, 52, 44, 36, 28, 20, 12,  4, 27, 19, 11,  3]
  92.     __left_rotations = [1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1]
  93.     __pc2 = [13, 16, 10, 23,  0,  4,2, 27, 14,  5, 20,  9,
  94.         22, 18, 11,  3, 25,  7, 15,  6, 26, 19, 12,  1,
  95.         40, 51, 30, 36, 46, 54, 29, 39, 50, 44, 32, 47,
  96.         43, 48, 38, 55, 33, 52, 45, 41, 49, 35, 28, 31]
  97.     __ip = [57, 49, 41, 33, 25, 17,  9,  1, 59, 51, 43, 35, 27, 19, 11, 3,
  98.         61, 53, 45, 37, 29, 21, 13,  5, 63, 55, 47, 39, 31, 23, 15, 7,
  99.         56, 48, 40, 32, 24, 16,  8,  0, 58, 50, 42, 34, 26, 18, 10, 2,
  100.         60, 52, 44, 36, 28, 20, 12,  4, 62, 54, 46, 38, 30, 22, 14, 6]
  101.     __expansion_table = [31,  0,  1,  2,  3,  4, 3,  4,  5,  6,  7,  8,
  102.          7,  8,  9, 10, 11, 12,11, 12, 13, 14, 15, 16,
  103.         15, 16, 17, 18, 19, 20,19, 20, 21, 22, 23, 24,
  104.         23, 24, 25, 26, 27, 28,27, 28, 29, 30, 31,  0]
  105.     __sbox = [[14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7,
  106.          0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8,
  107.          4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0,
  108.          15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13],
  109.         [15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10,
  110.          3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5,
  111.          0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15,
  112.          13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9],
  113.         [10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8,
  114.          13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1,
  115.          13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7,
  116.          1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12],
  117.         [7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15,
  118.          13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9,
  119.          10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4,
  120.          3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14],
  121.         [2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9,
  122.          14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6,
  123.          4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14,
  124.          11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3],
  125.         [12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11,
  126.          10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8,
  127.          9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6,
  128.          4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13],
  129.         [4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1,
  130.          13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6,
  131.          1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2,
  132.          6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12],
  133.         [13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7,
  134.          1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2,
  135.          7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8,
  136.          2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11],]
  137.     __p = [15, 6, 19, 20, 28, 11,27, 16, 0, 14, 22, 25,
  138.         4, 17, 30, 9, 1, 7,23,13, 31, 26, 2, 8,18, 12, 29, 5, 21, 10,3, 24]
  139.     __fp = [39,  7, 47, 15, 55, 23, 63, 31,38,  6, 46, 14, 54, 22, 62, 30,
  140.         37,  5, 45, 13, 53, 21, 61, 29,36,  4, 44, 12, 52, 20, 60, 28,
  141.         35,  3, 43, 11, 51, 19, 59, 27,34,  2, 42, 10, 50, 18, 58, 26,
  142.         33,  1, 41,  9, 49, 17, 57, 25,32,  0, 40,  8, 48, 16, 56, 24]
  143.     # Type of crypting being done
  144.     ENCRYPT = 0x00
  145.     DECRYPT = 0x01
  146.     def __init__(self, key, mode=ECB, IV=None):
  147.         if len(key) != 8:
  148.             raise ValueError("Invalid DES key size. Key must be exactly 8 bytes long.")
  149.         self.block_size = 8
  150.         self.key_size = 8
  151.         self.__padding = ''
  152.         self.setMode(mode)
  153.         if IV:
  154.             self.setIV(IV)
  155.         self.L = []
  156.         self.R = []
  157.         self.Kn = [ [0] * 48 ] * 16    # 16 48-bit keys (K1 - K16)
  158.         self.final = []
  159.         self.setKey(key)
  160.     def getKey(self):
  161.         return self.__key
  162.     def setKey(self, key):
  163.         self.__key = key
  164.         self.__create_sub_keys()
  165.     def getMode(self):
  166.         return self.__mode
  167.     def setMode(self, mode):
  168.         self.__mode = mode
  169.     def getIV(self):
  170.         return self.__iv
  171.     def setIV(self, IV):
  172.         if not IV or len(IV) != self.block_size:
  173.             raise ValueError("Invalid Initial Value (IV), must be a multiple of " + str(self.block_size) + " bytes")
  174.         self.__iv = IV
  175.     def getPadding(self):
  176.         return self.__padding
  177.     def __String_to_BitList(self, data):
  178.         l = len(data) * 8
  179.         result = [0] * l
  180.         pos = 0
  181.         for c in data:
  182.             i = 7
  183.             ch = ord(c)
  184.             while i >= 0:
  185.                 if ch & (1 << i) != 0:
  186.                     result[pos] = 1
  187.                 else:
  188.                     result[pos] = 0
  189.                 pos += 1
  190.                 i -= 1
  191.         return result
  192.     def __BitList_to_String(self, data):
  193.         result = ''
  194.         pos = 0
  195.         c = 0
  196.         while pos < len(data):
  197.             c += data[pos] << (7 - (pos % 8))
  198.             if (pos % 8) == 7:
  199.                 result += chr(c)
  200.                 c = 0
  201.             pos += 1
  202.         return result
  203.     def __permutate(self, table, block):
  204.         return [block[x] for x in table]
  205.     def __create_sub_keys(self):
  206.         key = self.__permutate(Des.__pc1, self.__String_to_BitList(self.getKey()))
  207.         i = 0
  208.         self.L = key[:28]
  209.         self.R = key[28:]
  210.         while i < 16:
  211.             j = 0
  212.             while j < Des.__left_rotations[i]:
  213.                 self.L.append(self.L[0])
  214.                 del self.L[0]
  215.                 self.R.append(self.R[0])
  216.                 del self.R[0]
  217.                 j += 1
  218.             self.Kn[i] = self.__permutate(Des.__pc2, self.L + self.R)
  219.             i += 1
  220.     def __des_crypt(self, block, crypt_type):
  221.         block = self.__permutate(Des.__ip, block)
  222.         self.L = block[:32]
  223.         self.R = block[32:]
  224.         if crypt_type == Des.ENCRYPT:
  225.             iteration = 0
  226.             iteration_adjustment = 1
  227.         else:
  228.             iteration = 15
  229.             iteration_adjustment = -1
  230.         i = 0
  231.         while i < 16:
  232.             tempR = self.R[:]
  233.             self.R = self.__permutate(Des.__expansion_table, self.R)
  234.             self.R = [x ^ y for x,y in zip(self.R, self.Kn[iteration])]
  235.             B = [self.R[:6], self.R[6:12], self.R[12:18], self.R[18:24], self.R[24:30], self.R[30:36], self.R[36:42], self.R[42:]]
  236.             j = 0
  237.             Bn = [0] * 32
  238.             pos = 0
  239.             while j < 8:
  240.                 m = (B[j][0] << 1) + B[j][5]
  241.                 n = (B[j][1] << 3) + (B[j][2] << 2) + (B[j][3] << 1) + B[j][4]
  242.                 v = Des.__sbox[j][(m << 4) + n]
  243.                 Bn[pos] = (v & 8) >> 3
  244.                 Bn[pos + 1] = (v & 4) >> 2
  245.                 Bn[pos + 2] = (v & 2) >> 1
  246.                 Bn[pos + 3] = v & 1
  247.                 pos += 4
  248.                 j += 1
  249.             self.R = self.__permutate(Des.__p, Bn)
  250.             self.R = [x ^ y for x, y in zip(self.R, self.L)]
  251.             self.L = tempR
  252.             i += 1
  253.             iteration += iteration_adjustment
  254.         self.final = self.__permutate(Des.__fp, self.R + self.L)
  255.         return self.final
  256.     def crypt(self, data, crypt_type):
  257.         if not data:
  258.             return ''
  259.         if len(data) % self.block_size != 0:
  260.             if crypt_type == Des.DECRYPT: # Decryption must work on 8 byte blocks
  261.                 raise ValueError("Invalid data length, data must be a multiple of " + str(self.block_size) + " bytes\n.")
  262.             if not self.getPadding():
  263.                 raise ValueError("Invalid data length, data must be a multiple of " + str(self.block_size) + " bytes\n. Try setting the optional padding character")
  264.             else:
  265.                 data += (self.block_size - (len(data) % self.block_size)) * self.getPadding()
  266.         if self.getMode() == CBC:
  267.             if self.getIV():
  268.                 iv = self.__String_to_BitList(self.getIV())
  269.             else:
  270.                 raise ValueError("For CBC mode, you must supply the Initial Value (IV) for ciphering")
  271.         i = 0
  272.         dict = {}
  273.         result = []
  274.         while i < len(data):
  275.             block = self.__String_to_BitList(data[i:i+8])
  276.             if self.getMode() == CBC:
  277.                 if crypt_type == Des.ENCRYPT:
  278.                     block = [x ^ y for x, y in zip(block, iv)]
  279.                 processed_block = self.__des_crypt(block, crypt_type)
  280.                 if crypt_type == Des.DECRYPT:
  281.                     processed_block = [x ^ y for x, y in zip(processed_block, iv)]
  282.                     iv = block
  283.                 else:
  284.                     iv = processed_block
  285.             else:
  286.                 processed_block = self.__des_crypt(block, crypt_type)
  287.             result.append(self.__BitList_to_String(processed_block))
  288.             i += 8
  289.         if crypt_type == Des.DECRYPT and self.getPadding():
  290.             s = result[-1]
  291.             while s[-1] == self.getPadding():
  292.                 s = s[:-1]
  293.             result[-1] = s
  294.         return ''.join(result)
  295.     def encrypt(self, data, pad=''):
  296.         self.__padding = pad
  297.         return self.crypt(data, Des.ENCRYPT)
  298.     def decrypt(self, data, pad=''):
  299.         self.__padding = pad
  300.         return self.crypt(data, Des.DECRYPT)
  301.  
  302. class Sectionizer(object):
  303.     def __init__(self, filename, ident):
  304.         self.contents = file(filename, 'rb').read()
  305.         self.header = self.contents[0:72]
  306.         if DEBUG:
  307.             file("header.dat", 'wb').write(self.header)
  308.  
  309.         self.num_sections, = struct.unpack('>H', self.contents[76:78])
  310.         if DEBUG:
  311.             print "number of sections: %d" % self.num_sections
  312.         if self.header[0x3C:0x3C+8] != ident:
  313.             raise ValueError('Invalid file format')
  314.         self.sections = []
  315.         for i in xrange(self.num_sections):
  316.             offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
  317.             flags, val = a1, a2<<16|a3<<8|a4
  318.             if DEBUG:
  319.                 print "section %d offset %d" % ( i, offset )
  320.             self.sections.append( (offset, flags, val) )
  321.     def loadSection(self, section):
  322.         if section + 1 == self.num_sections:
  323.             end_off = len(self.contents)
  324.         else:
  325.             end_off = self.sections[section + 1][0]
  326.         off = self.sections[section][0]
  327.         return self.contents[off:end_off]
  328.  
  329. def sanitizeFileName(s):
  330.     r = ''
  331.     for c in s.lower():
  332.         if c in "abcdefghijklmnopqrstuvwxyz0123456789_.-":
  333.             r += c
  334.     return r
  335.  
  336. def fixKey(key):
  337.     def fixByte(b):
  338.         return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80)
  339.     return "".join([chr(fixByte(ord(a))) for a in key])
  340.  
  341. def deXOR(text, sp, table):
  342.     r=''
  343.     j = sp
  344.     for i in xrange(len(text)):
  345.         r += chr(ord(table[j]) ^ ord(text[i]))
  346.         j = j + 1
  347.         if j == len(table):
  348.             j = 0
  349.     return r
  350.  
  351. class EreaderProcessor(object):
  352.     def __init__(self, section_reader, username, creditcard):
  353.         self.section_reader = section_reader
  354.         data = section_reader(0)
  355.         self.data0 = data
  356.         if DEBUG:
  357.             file("data0.dat", 'wb').write(self.data0)
  358.         version,  = struct.unpack('>H', data[0:2])
  359.         self.version = version
  360.         logging.info('eReader file format version %s', version)
  361.         if version != 272 and version != 260 and version != 259:
  362.             raise ValueError('incorrect eReader version %d (error 1)' % version)
  363.         data = section_reader(1)
  364.         self.data = data
  365.         # first key is used on last 8 bytes of data to get cookie_shuf and cookie_size
  366.         self.first_key = data[0:8]
  367.         des = Des(fixKey(self.first_key))
  368.         self.end_key = des.decrypt(data[-8:])
  369.         cookie_shuf, cookie_size = struct.unpack('>LL', self.end_key)
  370.         if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200:
  371.             raise ValueError('incorrect eReader version (error 2)')
  372.         # first key is also used to decrypt all remaining bytes in data into their shuffled version
  373.         cookie = des.decrypt(data[-cookie_size:])
  374.        
  375.         # now unshuffle it
  376.         def unshuff(data, shuf):
  377.             r = [''] * len(data)
  378.             j = 0
  379.             for i in xrange(len(data)):
  380.                 j = (j + shuf) % len(data)
  381.                 r[j] = data[i]
  382.             assert len("".join(r)) == len(data)
  383.             return "".join(r)
  384.         r = unshuff(cookie[0:-8], cookie_shuf)
  385.         if DEBUG:
  386.             file("mainrecord.dat", 'wb').write(r)
  387.        
  388.         def fixUsername(s):
  389.             r = ''
  390.             for c in s.lower():
  391.                 if (c >= 'a' and c <= 'z' or c >= '0' and c <= '9'):
  392.                     r += c
  393.             return r
  394.        
  395.         # second key is made of of user private info i.e. name and crediticard info
  396.         user_key = struct.pack('>LL', binascii.crc32(fixUsername(username)) & 0xffffffff, binascii.crc32(creditcard[-8:])& 0xffffffff)
  397.         self.user_key = user_key
  398.        
  399.         # the unshuffled data is examined to find record information
  400.         drm_sub_version = struct.unpack('>H', r[0:2])[0]
  401.         self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1
  402.         self.num_image_pages = struct.unpack('>H', r[26:26+2])[0]
  403.         self.first_image_page = struct.unpack('>H', r[24:24+2])[0]
  404.  
  405.         if version == 272:
  406.             self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0]
  407.             self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0]
  408.  
  409.             self.num_link_pages = struct.unpack('>H', r[30:30+2])[0]
  410.             self.first_link_page = struct.unpack('>H', r[28:28+2])[0]
  411.  
  412.             self.num_bkinfo_pages = struct.unpack('>H', r[34:34+2])[0]
  413.             self.first_bkinfo_page = struct.unpack('>H', r[32:32+2])[0]
  414.  
  415.             self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0]
  416.             self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0]
  417.  
  418.             self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0]
  419.             self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0]
  420.  
  421.             self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0]
  422.             self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0]
  423.  
  424.             # **before** data record 1 was decrypted and unshuffled, it contained data
  425.             # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc
  426.             self.xortable_offset  = struct.unpack('>H', r[40:40+2])[0]
  427.             self.xortable_size = struct.unpack('>H', r[42:42+2])[0]
  428.             self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size]
  429.         else:
  430.             self.num_chapter_pages = 0
  431.             self.num_link_pages = 0
  432.             self.num_bkinfo_pages = 0
  433.             self.num_footnote_pages = 0
  434.             self.num_xtextsize_pages = 0
  435.             self.num_sidebar_pages = 0
  436.             self.first_chapter_page = -1
  437.             self.first_link_page = -1
  438.             self.first_bkinfo_page = -1
  439.             self.first_footnote_page = -1
  440.             self.first_xtextsize_page = -1
  441.             self.first_sidebar_page = -1
  442.  
  443.         if DEBUG:
  444.             print "num_text_pages: %d" % self.num_text_pages
  445.             print "first_text_page: %d" % 1
  446.  
  447.             print "num_chapter_pages: %d" % self.num_chapter_pages
  448.             print "first_chapter_page: %d" % self.first_chapter_page
  449.  
  450.             print "num_image_pages: %d" % self.num_image_pages
  451.             print "first_image_page: %d" % self.first_image_page
  452.  
  453.             print "num_footnote_pages: %d" % self.num_footnote_pages
  454.             print "first_footnote_page: %d" % self.first_footnote_page
  455.  
  456.             print "num_link_pages: %d" % self.num_link_pages
  457.             print "first_link_page: %d" % self.first_link_page
  458.  
  459.             print "num_bkinfo_pages: %d" % self.num_bkinfo_pages
  460.             print "first_bkinfo_page: %d" % self.first_bkinfo_page
  461.  
  462.             print "num_xtextsize_pages: %d" % self.num_xtextsize_pages
  463.             print "first_xtextsize_page: %d" % self.first_xtextsize_page
  464.  
  465.             print "num_sidebar_pages: %d" % self.num_sidebar_pages
  466.             print "first_sidebar_page: %d" % self.first_sidebar_page
  467.  
  468.         logging.debug('self.num_text_pages %d', self.num_text_pages)
  469.         logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page)
  470.        
  471.         self.flags = struct.unpack('>L', r[4:8])[0]
  472.         reqd_flags = (1<<9) | (1<<7) | (1<<10)
  473.         if (self.flags & reqd_flags) != reqd_flags:
  474.             print "Flags: 0x%X" % self.flags
  475.             raise ValueError('incompatible eReader file')
  476.         # the user_key is used to unpack the encrypted key which is stored in the unshuffled data
  477.         des = Des(fixKey(user_key))
  478.         if version == 259:
  479.             if drm_sub_version != 7:
  480.                 raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
  481.             encrypted_key_sha = r[44:44+20]
  482.             encrypted_key = r[64:64+8]
  483.         elif version == 260:
  484.             if drm_sub_version != 13:
  485.                 raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
  486.             encrypted_key = r[44:44+8]
  487.             encrypted_key_sha = r[52:52+20]
  488.         elif version == 272:
  489.             encrypted_key = r[172:172+8]
  490.             encrypted_key_sha = r[56:56+20]
  491.  
  492.         # the decrypted version of encrypted_key is the content_key
  493.         self.content_key = des.decrypt(encrypted_key)
  494.        
  495.         if sha1(self.content_key).digest() != encrypted_key_sha:
  496.             raise ValueError('Incorrect Name and/or Credit Card')
  497.            
  498.     def getNumImages(self):
  499.         return self.num_image_pages
  500.        
  501.     def getImage(self, i):
  502.         sect = self.section_reader(self.first_image_page + i)
  503.         name = sect[4:4+32].strip('\0')
  504.         data = sect[62:]
  505.         return sanitizeFileName(name), data
  506.        
  507.    
  508.     def getChapterNamePMLOffsetData(self):
  509.         cv = ''
  510.         if self.num_chapter_pages > 0:
  511.             # now dump chapter offsets and chapter names
  512.             # see mobile read wiki for details
  513.             for i in xrange(self.num_chapter_pages):
  514.                 chaps = self.section_reader(self.first_chapter_page + i)
  515.                 j = i % self.xortable_size
  516.                 cv += deXOR(chaps, j, self.xortable)
  517.         return cv
  518.  
  519.     def getLinkNamePMLOffsetData(self):
  520.         lv = ''
  521.         if self.num_link_pages > 0:
  522.             # now dump link offset and link names
  523.             # see mobileread wiki for details
  524.             for i in xrange(self.num_link_pages):
  525.                 links = self.section_reader(self.first_link_page + i)
  526.                 j = i % self.xortable_size
  527.                 lv += deXOR(links, j, self.xortable)
  528.         return lv
  529.  
  530.  
  531.     def getBookInfoData(self):
  532.         bi = ''
  533.         if self.num_bkinfo_pages > 0:
  534.             # now dump book information
  535.             # see mobileread wiki for details
  536.             for i in xrange(self.num_bkinfo_pages):
  537.                 binfo = self.section_reader(self.first_bkinfo_page + i)
  538.                 j = i % self.xortable_size
  539.                 bi += deXOR(binfo, j, self.xortable)
  540.         return bi
  541.  
  542.  
  543.     def getExpandedTextSizesData(self):
  544.         ts = ''
  545.         if self.num_xtextsize_pages > 0:
  546.             # now dump table of expanded sizes for each text page
  547.             # (two bytes for each text page - see mobileread wiki for details)
  548.             for i in xrange(self.num_xtextsize_pages):
  549.                 tsize = self.section_reader(self.first_xtextsize_page + i)
  550.                 j = i % self.xortable_size
  551.                 ts += deXOR(tsize, j, self.xortable)
  552.         return ts
  553.  
  554.     def getText(self):
  555.         # uses the content_key and zlib to decrypt and then inflate text sections
  556.         des = Des(fixKey(self.content_key))
  557.         r = ''
  558.         for i in xrange(self.num_text_pages):
  559.             logging.debug('get page %d', i)
  560.             r += zlib.decompress(des.decrypt(self.section_reader(1 + i)))
  561.              
  562.         # now handle footnotes pages
  563.         if self.num_footnote_pages > 0:
  564.             # the first record of the footnote section must pass through the Xor Table to make it useful
  565.             sect = self.section_reader(self.first_footnote_page)
  566.             fnote_ids = deXOR(sect, 0, self.xortable)
  567.             p = 0
  568.             # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated
  569.             des = Des(fixKey(self.content_key))
  570.             r += '\\w="100%"'
  571.             r += 'Footnotes\p'
  572.             for i in xrange(1,self.num_footnote_pages):
  573.                 id_len = ord(fnote_ids[2])
  574.                 id = fnote_ids[3:3+id_len]
  575.                 fmarker='\Q="%s"' % id
  576.                 r+=fmarker
  577.                 r += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
  578.                 r += '\n'
  579.                 fnote_ids = fnote_ids[id_len+4:]
  580.  
  581.         # TO-DO - handle sidebar pages similar to footnotes pages
  582.  
  583.         return r
  584.  
  585. class PmlConverter(object):
  586.     def __init__(self, s):
  587.         self.s = s
  588.         self.pos = 0
  589.     def nextOptAttr(self):
  590.         p = self.pos
  591.         if self.s[p:p+2] != '="':
  592.             return None
  593.         r = ''
  594.         p += 2
  595.         while self.s[p] != '"':
  596.             r += self.s[p]
  597.             p += 1
  598.         self.pos = p + 1
  599.         return r
  600.     def next(self):
  601.         p = self.pos
  602.         if p >= len(self.s):
  603.             return None
  604.         if self.s[p] != '\\':
  605.             res = self.s.find('\\', p)
  606.             if res == -1:
  607.                 res = len(self.s)
  608.             self.pos = res
  609.             return self.s[p : res], None, None
  610.         c = self.s[p+1]
  611.         if c in 'pxcriuovtnsblBk-lI\\d':
  612.             self.pos = p + 2
  613.             return None, c, None
  614.         if c in 'TwmqQ':
  615.             self.pos = p + 2
  616.             return None, c, self.nextOptAttr()
  617.         if c == 'a':
  618.             self.pos = p + 5
  619.             return None, c, int(self.s[p+2:p+5])
  620.         if c == 'U':
  621.             self.pos = p + 6
  622.             return None, c, int(self.s[p+2:p+6], 16)
  623.         c = self.s[p+1:p+1+2]
  624.         if c in ('X0','X1','X2','X3','X4','Sp','Sb'):
  625.             self.pos = p + 3
  626.             return None, c, None
  627.         if c in ('C0','C1','C2','C3','C4','Fn','Sd'):
  628.             self.pos = p + 3
  629.             return None, c, self.nextOptAttr()
  630.         print "unknown escape code %s" % c
  631.         self.pos = p + 1
  632.         return None, None, None
  633.     def linkPrinter(link):
  634.         return '<a href="%s">' % link
  635.     def footnotePrinter(link):
  636.         return '<a href="#%s">' % link ## TODO may need to cgi.escape name....
  637.     def LinePrinter(link):
  638.         return '<hr width="%s">' % link
  639.     def ChapterTitle(link):
  640.         print "Nonfatal Error: ChapterTitle not implemented."
  641.         return '<!-- ChapterTitle %s -->' %link
  642.     def IndentPercent(link):
  643.         print "Nonfatal Error: IndentPercent not implemented."
  644.         return '<!-- IndentPercent: %s -->' %link
  645.     def NormalFont(link):
  646.         print "Nonfatal Error: NormalFont not implemented."
  647.         return '<!-- NormalFont %s -->' %link
  648.     def StdFont(link):
  649.         print "Nonfatal Error: StdFont not implemented."
  650.         return '<!-- StdFont: %s -->' %link
  651.     def SingleBackslash(link):
  652.         print "Nonfatal Error: SingleBackslash not implemented."
  653.         return '<!-- SingleBackslash: %s -->' %link
  654.     def SoftHyphen(link):
  655.         print "Nonfatal Error: SoftHyphen not implemented."
  656.         return '<!-- SoftHyphen: %s -->' %link
  657.     def ReferenceIndexItem(link):
  658.         print "Nonfatal Error: ReferenceIndexItem not implemented."
  659.         return '<!-- CReferenceIndexItem: %s -->' %link
  660.  
  661.    
  662.     # See http://wiki.mobileread.com/wiki/PML#Palm_Markup_Language
  663.     html_tags = {
  664.         'c' : ('<div class="center">', '</div>\n'),
  665.         'r' : ('<div class="right">', '</div>\n'),
  666.         'i' : ('<i>', '</i>'),
  667.         'u' : ('<u>', '</u>'),
  668.         'b' : ('<strong>', '</strong>'),
  669.         'B' : ('<strong>', '</strong>'),
  670.         'o' : ('<strike>', '</strike>'),
  671.         'v' : ('<!-- ', ' -->'),
  672.         't' : ('', ''),
  673.         'Sb' : ('<sub>', '</sub>'),
  674.         'Sp' : ('<sup>', '</sup>'),
  675.         'X0' : ('<h1>', '</h1>\n'),
  676.         'X1' : ('<h2>', '</h2>\n'),
  677.         'X2' : ('<h3>', '</h3>\n'),
  678.         'X3' : ('<h4>', '</h4>\n'),
  679.         'X4' : ('<h5>', '</h5>\n'),
  680.         'l' : ('<font size="+2">', '</font>'),
  681.         'q' : (linkPrinter, '</a>'),
  682.         'Fn' : (footnotePrinter, '</a>'),
  683.         'w' : (LinePrinter, ''),
  684.         #'m' : handled in if block,
  685.         #'Q' : handled in if block,
  686.         #'a' : handled in if block,
  687.         #'U' : handled in if block,
  688.         'x' : ('<h1 class="breakbefore">', '</h1>\n'),
  689.         'Cn' : (ChapterTitle, ''),
  690.         'T' : (IndentPercent, ''),
  691.         'n' : (NormalFont, ''),
  692.         #'s' : (StdFont, ''),
  693.         's' : ('', ''),
  694.         'k' : ('<span style="font-variant: small-caps;">', '</span>'), # NOTE some pdb's then go ahead and use uppercase letters - which doesn't format the way one would expect (perhaps post process the output with html dom and lower case if only upper case letters are found?)
  695.         '\\' : (SingleBackslash, ''),
  696.         '-' : (SoftHyphen, ''),
  697.         'I' : (ReferenceIndexItem, ''),
  698.         'Sd' : (footnotePrinter, '</a>'), ## untested
  699.     }
  700.     html_one_tags = {
  701.         'p' : '<p class="breakafter">&nbsp;</p>\n'
  702.     }
  703.     pml_chars = {
  704.         160 : '&nbsp;',130 : '&#8212;',131: '&#402;',132: '&#8222;',
  705.         133: '&#8230;',134: '&#8224;',135: '&#8225;',138: '&#352;',
  706.         139: '&#8249;',140: '&#338;',145: '&#8216;',146: '&#8217;',
  707.         147: '&#8220;',148: '&#8221;',149: '&#8226;',150: '&#8211;',
  708.         151: '&#8212;',153: '&#8482;',154: '&#353;',155: '&#8250;',
  709.         156: '&#339;',159: '&#376;'
  710.     }
  711.     def process(self):
  712.         final = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n<html>\n<head>\n<meta http-equiv="content-type" content="text/html; charset=windows-1252">\n<style type="text/css">\ndiv.center { text-align:center; }\ndiv.right { text-align:right; }\n.breakbefore { page-break-before: always; }\n.breakafter { page-break-after: always; }\n</style>\n</head>\n<body>\n'
  713.         in_tags = []
  714.         def makeText(s):
  715.             s = s.replace('&', '&amp;')
  716.             #s = s.replace('"', '&quot;')
  717.             s = s.replace('<', '&lt;')
  718.             s = s.replace('>', '&gt;')
  719.             s = s.replace('\n', '<br>\n')
  720.             return s
  721.         while True:
  722.             r = self.next()
  723.             if not r:
  724.                 break
  725.             text, cmd, attr = r
  726.             if text:
  727.                 final += makeText(text)
  728.             if cmd:
  729.                 def getTag(ti, end):
  730.                     cmd, attr = ti
  731.                     r = self.html_tags[cmd][end]
  732.                     if type(r) != str:
  733.                         r = r(attr)
  734.                     return r
  735.                    
  736.                 if cmd in self.html_tags:
  737.                     pair = (cmd, attr)
  738.                     if cmd not in [a for (a,b) in in_tags]:
  739.                         final += getTag(pair, False)
  740.                         in_tags.append(pair)
  741.                     else:
  742.                         j = len(in_tags)
  743.                         while True:
  744.                             j = j - 1
  745.                             final += getTag(in_tags[j], True)
  746.                             if in_tags[j][0] == cmd:
  747.                                 break
  748.                         del in_tags[j]
  749.                         while j < len(in_tags):
  750.                             final += getTag(in_tags[j], False)
  751.                             j = j + 1
  752.                            
  753.                 if cmd in self.html_one_tags:
  754.                     final += self.html_one_tags[cmd]
  755.                 if cmd == 'm':
  756.                     unquotedimagepath = bookname + "_img/" + attr
  757.                     imagepath = urllib.quote( unquotedimagepath )
  758.                     final += '<img src="%s" alt="">' % imagepath
  759.                 if cmd == 'Q':
  760.                     final += '<a name="%s" id="%s"> </a>' % (attr, attr)
  761.                 if cmd == 'a':
  762.                     final += self.pml_chars.get(attr, '&#%d;' % attr)
  763.                 if cmd == 'U':
  764.                     final += '&#%d;' % attr
  765.         final += '</body></html>\n'
  766.         while True:
  767.             s = final.replace('<br>\n<br>\n<br>\n', '<br>\n<br>\n')
  768.             if s == final:
  769.                 break
  770.             final = s
  771.         return final
  772.  
  773. def convertEreaderToHtml(infile, name, cc, outdir):
  774.     if not os.path.exists(outdir):
  775.         os.makedirs(outdir)
  776.     sect = Sectionizer(infile, 'PNRdPPrs')
  777.     er = EreaderProcessor(sect.loadSection, name, cc)
  778.  
  779.     if er.getNumImages() > 0:
  780.         imagedir = bookname + "_img"
  781.         imagedirpath = os.path.join(outdir,imagedir)
  782.         if not os.path.exists(imagedirpath):
  783.             os.makedirs(imagedirpath)
  784.         for i in xrange(er.getNumImages()):
  785.             name, contents = er.getImage(i)
  786.             file(os.path.join(imagedirpath, name), 'wb').write(contents)
  787.  
  788.     pml_string = er.getText()
  789.     pml = PmlConverter(pml_string)
  790.     pmlfilename = bookname + ".pml"
  791.     htmlfilename = bookname + ".html"
  792.     file(os.path.join(outdir, pmlfilename),'wb').write(pml_string)
  793.     file(os.path.join(outdir, htmlfilename),'wb').write(pml.process())
  794.    
  795.     ts = er.getExpandedTextSizesData()
  796.     file(os.path.join(outdir, 'xtextsizes.dat'), 'wb').write(ts)
  797.  
  798.     cv = er.getChapterNamePMLOffsetData()
  799.     file(os.path.join(outdir, 'chapters.dat'), 'wb').write(cv)
  800.  
  801.     bi = er.getBookInfoData()
  802.     file(os.path.join(outdir, 'bookinfo.dat'), 'wb').write(bi)
  803.    
  804.     lv = er.getLinkNamePMLOffsetData()
  805.     file(os.path.join(outdir, 'links.dat'), 'wb').write(lv)
  806.  
  807. def main(argv=None):
  808.     global bookname
  809.     if argv is None:
  810.         argv = sys.argv
  811.    
  812.     print "eReader2Html v%s. Copyright (c) 2008 The Dark Reverser" % __version__
  813.  
  814.     if len(argv)!=4 and len(argv)!=5:
  815.         print "Converts DRMed eReader books to PML Source and HTML"
  816.         print "Usage:"
  817.         print "  ereader2html infile.pdb [outdir] \"your name\" credit_card_number "
  818.         print "Note:"
  819.         print "  if ommitted, outdir defaults based on 'infile.pdb'"
  820.         print "  It's enough to enter the last 8 digits of the credit card number"
  821.     else:
  822.         if len(argv)==4:
  823.             infile, name, cc = argv[1], argv[2], argv[3]
  824.             outdir = infile[:-4] + '_Source'
  825.         elif len(argv)==5:
  826.             infile, outdir, name, cc = argv[1], argv[2], argv[3], argv[4]
  827.         bookname = os.path.splitext(os.path.basename(infile))[0]
  828.        
  829.         try:
  830.             print "Processing..."
  831.             import time
  832.             start_time = time.time()
  833.             convertEreaderToHtml(infile, name, cc, outdir)
  834.             end_time = time.time()
  835.             search_time = end_time - start_time
  836.             print 'elapsed time: %.2f seconds' % (search_time, )
  837.             print 'output in %s' % outdir
  838.             print "done"
  839.         except ValueError, e:
  840.             print "Error: %s" % e
  841.  
  842. if __name__ == "__main__":
  843.     #import cProfile
  844.     #command = """sys.exit(main())"""
  845.     #cProfile.runctx( command, globals(), locals(), filename="cprofile.profile" )
  846.    
  847.     sys.exit(main())
  848.    
  849.  
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×