Advertisement
lifeasageek

ttfFromDocx

Jan 19th, 2012
480
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.05 KB | None | 0 0
  1. # extract .ttf files from .docx
  2. # lifeasageek@gmail.com
  3.  
  4. import base64
  5. import sys
  6. import os, zipfile
  7. from BeautifulSoup import BeautifulStoneSoup as soup
  8.  
  9. def extractFontKey(files):
  10.     keys = []
  11.  
  12.     for filename in files:
  13.         if not filename.endswith("fontTable.xml"):
  14.             continue
  15.         xmlStr = open(filename).read()
  16.         xml = soup(xmlStr)
  17.         # print xml
  18.         for i, f in enumerate(xml.findAll('w:font')):
  19.             name = f['w:name']
  20.             x = soup(str(f))
  21.            
  22.             try:
  23.                 fontkey =  x.findAll('w:embedregular')[0]['w:fontkey']
  24.                 fontkey = "".join(fontkey[1:-1].split("-"))
  25.             except: # some entry does not have fontkey. skip this
  26.                 continue
  27.  
  28.             print name, fontkey
  29.             keys.append( (name, fontkey))
  30.         return keys
  31.  
  32. def deobf( fontKeyBin, obfFontStr):
  33.     fontStr = [ord(x) for x in obfFontStr]
  34.  
  35.     for i in range(16):
  36.         fontStr[i] = ord(obfFontStr[i]) ^ ord(fontKeyBin[15-i])
  37.         fontStr[i+16] = ord(obfFontStr[i+16]) ^ ord(fontKeyBin[15-i])
  38.        
  39.     fontStr = "".join( [ "%c" % x for x in fontStr])
  40.     return fontStr
  41.  
  42. def deobfAll(files, keys):
  43.     files.sort() # odttf filenames seem to be sorted as it was appeared in .xml
  44.  
  45.     i = 0
  46.     for filename in files:
  47.         if not filename.endswith(".odttf"):
  48.             continue
  49.         print filename
  50.         obfStr = open( filename, 'rb').read()
  51.         (fontname, fontkey) = keys[i]
  52.         print fontkey
  53.  
  54.         deobfStr = deobf(fontkey.decode("hex"), obfStr)
  55.  
  56.         open( fontname + ".ttf", 'wb').write(deobfStr)
  57.         i += 1
  58.  
  59. def decompress(docxFilename):
  60.     z = zipfile.ZipFile(docxFilename)
  61.    
  62.     try:
  63.         os.mkdir( "./tmp")
  64.     except:
  65.         pass
  66.  
  67.     z.extractall("./tmp")
  68.     files = ["./tmp/" + x for x in z.namelist()]
  69.     return files
  70.    
  71. if __name__ == '__main__':
  72.     docxFilename = sys.argv[1]
  73.    
  74.     files = decompress( docxFilename)
  75.     keys = extractFontKey(files)
  76.     deobfAll(files, keys)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement