SHARE
TWEET

DarkReverser

a guest Sep 24th, 2008 3,074 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # This is a python script. You need a Python interpreter to run it.
  2. # For example, ActiveState Python, which exists for windows.
  3. #
  4. # Big Thanks to Igor SKOCHINSKY for providing me with all his information
  5. # and source code relating to the inner workings of this compression scheme.
  6. # Without it, I wouldn't be able to solve this as easily.
  7. #
  8. # Changelog
  9. #  0.01 - Initial version
  10. #  0.02 - Fix issue with size computing
  11. #  0.03 - Fix issue with some files
  12.  
  13.  
  14. import struct, sys
  15.  
  16. class BitReader:
  17.         def __init__(self, data):
  18.                 self.data, self.pos, self.nbits = data + "\x00\x00\x00\x00", 0, len(data) * 8
  19.         def peek(self, n):
  20.                 r, g = 0, 0
  21.                 while g < n:
  22.                         r, g = (r << 8) | ord(self.data[(self.pos+g)>>3]), g + 8 - ((self.pos+g) & 7)
  23.                 return (r >> (g - n)) & ((1 << n) - 1)
  24.         def eat(self, n):
  25.                 self.pos += n
  26.                 return self.pos <= self.nbits
  27.         def left(self):
  28.                 return self.nbits - self.pos
  29.  
  30. class HuffReader:
  31.         def __init__(self, huffs):
  32.                 self.huffs = huffs
  33.                 h = huffs[0]
  34.                 if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
  35.                         raise ValueError('invalid huff1 header')
  36.                 if huffs[1][0:4] != 'CDIC' or huffs[1][4:8] != '\x00\x00\x00\x10':
  37.                         raise ValueError('invalid huff2 header')
  38.                 self.entry_bits, = struct.unpack('>L', huffs[1][12:16])
  39.                 off1,off2 = struct.unpack('>LL', huffs[0][16:24])
  40.                 self.dict1 = struct.unpack('<256L', huffs[0][off1:off1+256*4])
  41.                 self.dict2 = struct.unpack('<64L', huffs[0][off2:off2+64*4])
  42.                 self.dicts = huffs[1:]
  43.                 self.r = ''
  44.                
  45.         def _unpack(self, bits, depth = 0):
  46.                 if depth > 32:
  47.                         raise ValueError('corrupt file')
  48.                 while bits.left():
  49.                         dw = bits.peek(32)
  50.                         v = self.dict1[dw >> 24]
  51.                         codelen = v & 0x1F
  52.                         assert codelen != 0
  53.                         code = dw >> (32 - codelen)
  54.                         r = (v >> 8)
  55.                         if not (v & 0x80):
  56.                                 while code < self.dict2[(codelen-1)*2]:
  57.                                         codelen += 1
  58.                                         code = dw >> (32 - codelen)
  59.                                 r = self.dict2[(codelen-1)*2+1]
  60.                         r -= code
  61.                         assert codelen != 0
  62.                         if not bits.eat(codelen):
  63.                                 return
  64.                         dicno = r >> self.entry_bits
  65.                         off1 = 16 + (r - (dicno << self.entry_bits)) * 2
  66.                         dic = self.dicts[dicno]
  67.                         off2 = 16 + ord(dic[off1]) * 256 + ord(dic[off1+1])
  68.                         blen = ord(dic[off2]) * 256 + ord(dic[off2+1])
  69.                         slice = dic[off2+2:off2+2+(blen&0x7fff)]
  70.                         if blen & 0x8000:
  71.                                 self.r += slice
  72.                         else:
  73.                                 self._unpack(BitReader(slice), depth + 1)
  74.  
  75.         def unpack(self, data):
  76.                 self.r = ''
  77.                 self._unpack(BitReader(data))
  78.                 return self.r
  79.  
  80. class Sectionizer:
  81.         def __init__(self, filename, ident):
  82.                 self.contents = file(filename, 'rb').read()
  83.                 self.header = self.contents[0:72]
  84.                 self.num_sections, = struct.unpack('>H', self.contents[76:78])
  85.                 if self.header[0x3C:0x3C+8] != ident:
  86.                         raise ValueError('Invalid file format')
  87.                 self.sections = []
  88.                 for i in xrange(self.num_sections):
  89.                         offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
  90.                         flags, val = a1, a2<<16|a3<<8|a4
  91.                         self.sections.append( (offset, flags, val) )
  92.         def loadSection(self, section):
  93.                 if section + 1 == self.num_sections:
  94.                         end_off = len(self.contents)
  95.                 else:
  96.                         end_off = self.sections[section + 1][0]
  97.                 off = self.sections[section][0]
  98.                 return self.contents[off:end_off]
  99.  
  100.  
  101. def getSizeOfTrailingDataEntry(ptr, size):
  102.         bitpos, result = 0, 0
  103.         while True:
  104.                 v = ord(ptr[size-1])
  105.                 result |= (v & 0x7F) << bitpos
  106.                 bitpos += 7
  107.                 size -= 1
  108.                 if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
  109.                         return result
  110.  
  111. def getSizeOfTrailingDataEntries(ptr, size, flags):
  112.         num = 0
  113.         flags >>= 1
  114.         while flags:
  115.                 if flags & 1:
  116.                         num += getSizeOfTrailingDataEntry(ptr, size - num)
  117.                 flags >>= 1            
  118.         return num
  119.  
  120. def unpackBook(input_file):
  121.         sect = Sectionizer(input_file, 'BOOKMOBI')
  122.  
  123.         header = sect.loadSection(0)
  124.  
  125.         crypto_type, = struct.unpack('>H', header[0xC:0xC+2])
  126.         if crypto_type != 0:
  127.                 raise ValueError('The book is encrypted. Run mobidedrm first')
  128.  
  129.         if header[0:2] != 'DH':
  130.                 raise ValueError('invalid compression type')
  131.  
  132.         extra_flags, = struct.unpack('>L', header[0xF0:0xF4])
  133.         records, = struct.unpack('>H', header[0x8:0x8+2])
  134.  
  135.         huffoff,huffnum = struct.unpack('>LL', header[0x70:0x78])
  136.         huffs = [sect.loadSection(i) for i in xrange(huffoff, huffoff+huffnum)]
  137.         huff = HuffReader(huffs)
  138.  
  139.         def decompressSection(nr):
  140.                 data = sect.loadSection(nr)
  141.                 trail_size = getSizeOfTrailingDataEntries(data, len(data), extra_flags)
  142.                 return huff.unpack(data[0:len(data)-trail_size])
  143.  
  144.         r = ''
  145.         for i in xrange(1, records+1):
  146.                 r += decompressSection(i)
  147.         return r
  148.  
  149. print "MobiHuff v0.03"
  150. print "  Copyright (c) 2008 The Dark Reverser <dark.reverser@googlemail.com>"
  151. if len(sys.argv)!=3:
  152.         print ""
  153.         print "Description:"
  154.         print "  Unpacks the new mobipocket huffdic compression."
  155.         print "  This program works with unencrypted files only."
  156.         print "Usage:"
  157.         print "  mobihuff.py infile.mobi outfile.html"
  158. else:  
  159.         infile = sys.argv[1]
  160.         outfile = sys.argv[2]
  161.         try:
  162.                 print "Decompressing...",
  163.                 result = unpackBook(infile)
  164.                 file(outfile, 'wb').write(result)
  165.                 print "done"
  166.         except ValueError, e:
  167.                 print
  168.                 print "Error: %s" % e
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top