Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # extract .ttf files from .docx
- # lifeasageek@gmail.com
- import base64
- import sys
- import os, zipfile
- from BeautifulSoup import BeautifulStoneSoup as soup
- def extractFontKey(files):
- keys = []
- for filename in files:
- if not filename.endswith("fontTable.xml"):
- continue
- xmlStr = open(filename).read()
- xml = soup(xmlStr)
- # print xml
- for i, f in enumerate(xml.findAll('w:font')):
- name = f['w:name']
- x = soup(str(f))
- try:
- fontkey = x.findAll('w:embedregular')[0]['w:fontkey']
- fontkey = "".join(fontkey[1:-1].split("-"))
- except: # some entry does not have fontkey. skip this
- continue
- print name, fontkey
- keys.append( (name, fontkey))
- return keys
- def deobf( fontKeyBin, obfFontStr):
- fontStr = [ord(x) for x in obfFontStr]
- for i in range(16):
- fontStr[i] = ord(obfFontStr[i]) ^ ord(fontKeyBin[15-i])
- fontStr[i+16] = ord(obfFontStr[i+16]) ^ ord(fontKeyBin[15-i])
- fontStr = "".join( [ "%c" % x for x in fontStr])
- return fontStr
- def deobfAll(files, keys):
- files.sort() # odttf filenames seem to be sorted as it was appeared in .xml
- i = 0
- for filename in files:
- if not filename.endswith(".odttf"):
- continue
- print filename
- obfStr = open( filename, 'rb').read()
- (fontname, fontkey) = keys[i]
- print fontkey
- deobfStr = deobf(fontkey.decode("hex"), obfStr)
- open( fontname + ".ttf", 'wb').write(deobfStr)
- i += 1
- def decompress(docxFilename):
- z = zipfile.ZipFile(docxFilename)
- try:
- os.mkdir( "./tmp")
- except:
- pass
- z.extractall("./tmp")
- files = ["./tmp/" + x for x in z.namelist()]
- return files
- if __name__ == '__main__':
- docxFilename = sys.argv[1]
- files = decompress( docxFilename)
- keys = extractFontKey(files)
- deobfAll(files, keys)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement