Guest User

DarkReverser

a guest
Sep 24th, 2008
3,101
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # This is a python script. You need a Python interpreter to run it.
  2. # For example, ActiveState Python, which exists for windows.
  3. #
  4. # Big Thanks to Igor SKOCHINSKY for providing me with all his information
  5. # and source code relating to the inner workings of this compression scheme.
  6. # Without it, I wouldn't be able to solve this as easily.
  7. #
  8. # Changelog
  9. #  0.01 - Initial version
  10. #  0.02 - Fix issue with size computing
  11. #  0.03 - Fix issue with some files
  12.  
  13.  
  14. import struct, sys
  15.  
  16. class BitReader:
  17.     def __init__(self, data):
  18.         self.data, self.pos, self.nbits = data + "\x00\x00\x00\x00", 0, len(data) * 8
  19.     def peek(self, n):
  20.         r, g = 0, 0
  21.         while g < n:
  22.             r, g = (r << 8) | ord(self.data[(self.pos+g)>>3]), g + 8 - ((self.pos+g) & 7)
  23.         return (r >> (g - n)) & ((1 << n) - 1)
  24.     def eat(self, n):
  25.         self.pos += n
  26.         return self.pos <= self.nbits
  27.     def left(self):
  28.         return self.nbits - self.pos
  29.  
  30. class HuffReader:
  31.     def __init__(self, huffs):
  32.         self.huffs = huffs
  33.         h = huffs[0]
  34.         if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
  35.             raise ValueError('invalid huff1 header')
  36.         if huffs[1][0:4] != 'CDIC' or huffs[1][4:8] != '\x00\x00\x00\x10':
  37.             raise ValueError('invalid huff2 header')
  38.         self.entry_bits, = struct.unpack('>L', huffs[1][12:16])
  39.         off1,off2 = struct.unpack('>LL', huffs[0][16:24])
  40.         self.dict1 = struct.unpack('<256L', huffs[0][off1:off1+256*4])
  41.         self.dict2 = struct.unpack('<64L', huffs[0][off2:off2+64*4])
  42.         self.dicts = huffs[1:]
  43.         self.r = ''
  44.        
  45.     def _unpack(self, bits, depth = 0):
  46.         if depth > 32:
  47.             raise ValueError('corrupt file')
  48.         while bits.left():
  49.             dw = bits.peek(32)
  50.             v = self.dict1[dw >> 24]
  51.             codelen = v & 0x1F
  52.             assert codelen != 0
  53.             code = dw >> (32 - codelen)
  54.             r = (v >> 8)
  55.             if not (v & 0x80):
  56.                 while code < self.dict2[(codelen-1)*2]:
  57.                     codelen += 1
  58.                     code = dw >> (32 - codelen)
  59.                 r = self.dict2[(codelen-1)*2+1]
  60.             r -= code
  61.             assert codelen != 0
  62.             if not bits.eat(codelen):
  63.                 return
  64.             dicno = r >> self.entry_bits
  65.             off1 = 16 + (r - (dicno << self.entry_bits)) * 2
  66.             dic = self.dicts[dicno]
  67.             off2 = 16 + ord(dic[off1]) * 256 + ord(dic[off1+1])
  68.             blen = ord(dic[off2]) * 256 + ord(dic[off2+1])
  69.             slice = dic[off2+2:off2+2+(blen&0x7fff)]
  70.             if blen & 0x8000:
  71.                 self.r += slice
  72.             else:
  73.                 self._unpack(BitReader(slice), depth + 1)
  74.  
  75.     def unpack(self, data):
  76.         self.r = ''
  77.         self._unpack(BitReader(data))
  78.         return self.r
  79.  
  80. class Sectionizer:
  81.     def __init__(self, filename, ident):
  82.         self.contents = file(filename, 'rb').read()
  83.         self.header = self.contents[0:72]
  84.         self.num_sections, = struct.unpack('>H', self.contents[76:78])
  85.         if self.header[0x3C:0x3C+8] != ident:
  86.             raise ValueError('Invalid file format')
  87.         self.sections = []
  88.         for i in xrange(self.num_sections):
  89.             offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
  90.             flags, val = a1, a2<<16|a3<<8|a4
  91.             self.sections.append( (offset, flags, val) )
  92.     def loadSection(self, section):
  93.         if section + 1 == self.num_sections:
  94.             end_off = len(self.contents)
  95.         else:
  96.             end_off = self.sections[section + 1][0]
  97.         off = self.sections[section][0]
  98.         return self.contents[off:end_off]
  99.  
  100.  
  101. def getSizeOfTrailingDataEntry(ptr, size):
  102.     bitpos, result = 0, 0
  103.     while True:
  104.         v = ord(ptr[size-1])
  105.         result |= (v & 0x7F) << bitpos
  106.         bitpos += 7
  107.         size -= 1
  108.         if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
  109.             return result
  110.  
  111. def getSizeOfTrailingDataEntries(ptr, size, flags):
  112.     num = 0
  113.     flags >>= 1
  114.     while flags:
  115.         if flags & 1:
  116.             num += getSizeOfTrailingDataEntry(ptr, size - num)
  117.         flags >>= 1    
  118.     return num
  119.  
  120. def unpackBook(input_file):
  121.     sect = Sectionizer(input_file, 'BOOKMOBI')
  122.  
  123.     header = sect.loadSection(0)
  124.  
  125.     crypto_type, = struct.unpack('>H', header[0xC:0xC+2])
  126.     if crypto_type != 0:
  127.         raise ValueError('The book is encrypted. Run mobidedrm first')
  128.  
  129.     if header[0:2] != 'DH':
  130.         raise ValueError('invalid compression type')
  131.  
  132.     extra_flags, = struct.unpack('>L', header[0xF0:0xF4])
  133.     records, = struct.unpack('>H', header[0x8:0x8+2])
  134.  
  135.     huffoff,huffnum = struct.unpack('>LL', header[0x70:0x78])
  136.     huffs = [sect.loadSection(i) for i in xrange(huffoff, huffoff+huffnum)]
  137.     huff = HuffReader(huffs)
  138.  
  139.     def decompressSection(nr):
  140.         data = sect.loadSection(nr)
  141.         trail_size = getSizeOfTrailingDataEntries(data, len(data), extra_flags)
  142.         return huff.unpack(data[0:len(data)-trail_size])
  143.  
  144.     r = ''
  145.     for i in xrange(1, records+1):
  146.         r += decompressSection(i)
  147.     return r
  148.  
  149. print "MobiHuff v0.03"
  150. print "  Copyright (c) 2008 The Dark Reverser <dark.reverser@googlemail.com>"
  151. if len(sys.argv)!=3:
  152.     print ""
  153.     print "Description:"
  154.     print "  Unpacks the new mobipocket huffdic compression."
  155.     print "  This program works with unencrypted files only."
  156.     print "Usage:"
  157.     print "  mobihuff.py infile.mobi outfile.html"
  158. else:  
  159.     infile = sys.argv[1]
  160.     outfile = sys.argv[2]
  161.     try:
  162.         print "Decompressing...",
  163.         result = unpackBook(infile)
  164.         file(outfile, 'wb').write(result)
  165.         print "done"
  166.     except ValueError, e:
  167.         print
  168.         print "Error: %s" % e
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×