Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Requires Python 2.7
- #The floattostring.dll requires 32bit Python to write floating point numbers in a succinct manner,
- #but the dll is not required to run this script.
- import string
- import sys
- from binascii import hexlify
- import struct
- import os
- from cStringIO import StringIO
- import cProfile
- import cPickle
- import copy
- #Adjust input and output folders here
- inputFolder=r"C:\hexing\bf4 dump\bundles\ebx"
- outputFolder=r"C:\hexing\bf4 ebx"
- guidTableName="guiTable bf4cr2patch" #Name of the guid table file; keeping separate names
- #for separate games is highly recommended. The table is created at the location of the script.
- EXTENSION=".txt" #Use a different file extension if you like.
- SEP=" " #Adjust the amount of whitespace on the left of the converted file.
- #Show offsets to the left
- printOffsets=False #True/False
- #Ignore all instances and fields with these names when converting to text:
- IGNOREINSTANCES=["RawFileDataAsset"] #used in WebBrowser\Fonts, crashes the script otherwise
- IGNOREFIELDS=[]
- ##IGNOREINSTANCES=["ShaderAdjustmentData","SocketData","WeaponSkinnedSocketObjectData","WeaponRegularSocketObjectData"]
- ##IGNOREFIELDS=["Mesh3pTransforms","Mesh3pRigidMeshSocketObjectTransforms"]
- #I recommend ignoring a few fields/instances which are related to meshes,
- #take up lots of space, and contain no useful information as the mesh format is not even known.
- #As an example, Mesh3pTransforms contains nothing but xyz vectors and is found in most weapon
- #files. This field takes up 715 lines in the 870 shotgun (the entire file is 3829 lines).
- #If you enjoy having to scroll past these 700 lines all the time, then ignore nothing.
- #Note however that the lists above applied to bf3. In bf4 I can only find Mesh3pTransforms in the files but not the other strings.
- #Nevertheless, use this as a guide to ignore fields/instances on your own.
- #First run through all files to create a guid table to resolve external file references.
- #Then run through all files once more, but this time convert them using the guid table.
- def main():
- createGuidTable()
- dumpText()
- ##############################################################
- ##############################################################
- unpackLE = struct.unpack
- def unpackBE(typ,data): return struct.unpack(">"+typ,data)
- def createGuidTable():
- for dir0, dirs, ff in os.walk(inputFolder):
- for fname in ff:
- if fname[-4:]!=".ebx": continue
- f=open(lp(dir0+"\\"+fname),"rb")
- relPath=(dir0+"\\"+fname)[len(inputFolder):-4]
- if relPath[0]=="\\": relPath=relPath[1:]
- try:
- dbx=Dbx(f,relPath)
- f.close()
- except ValueError as msg:
- f.close()
- if str(msg).startswith("The file is not ebx: "):
- continue
- else: asdf
- guidTable[dbx.fileGUID]=dbx.trueFilename
- f5=open(guidTableName,"wb") #write the table
- cPickle.dump(guidTable,f5)
- f5.close()
- def dumpText():
- for dir0, dirs, ff in os.walk(inputFolder):
- for fname in ff:
- if fname[-4:]!=".ebx": continue
- print fname
- f=open(lp(dir0+"\\"+fname),"rb")
- relPath=(dir0+"\\"+fname)[len(inputFolder):-4]
- if relPath[0]=="\\": relPath=relPath[1:]
- try:
- dbx=Dbx(f,relPath)
- f.close()
- except ValueError as msg:
- f.close()
- if str(msg).startswith("The file is not ebx: "):
- continue
- else: asdf
- dbx.dump(outputFolder)
- def open2(path,mode="rb"):
- if mode=="wb":
- #create folders if necessary and return the file handle
- #first of all, create one folder level manully because makedirs might fail
- pathParts=path.split("\\")
- manualPart="\\".join(pathParts[:2])
- if not os.path.isdir(manualPart):
- os.makedirs(manualPart)
- #now handle the rest, including extra long path names
- folderPath=lp(os.path.dirname(path))
- if not os.path.isdir(folderPath): os.makedirs(folderPath)
- return open(lp(path),mode)
- def lp(path): #long, normalized pathnames
- if len(path)<=247 or path=="" or path[:4]=='\\\\?\\': return os.path.normpath(path)
- return unicode('\\\\?\\' + os.path.normpath(path))
- try:
- from ctypes import *
- floatlib = cdll.LoadLibrary("floattostring")
- def formatfloat(num):
- bufType = c_char * 100
- buf = bufType()
- bufpointer = pointer(buf)
- floatlib.convertNum(c_double(num), bufpointer, 100)
- rawstring=(buf.raw)[:buf.raw.find("\x00")]
- if rawstring[:2]=="-.": return "-0."+rawstring[2:]
- elif rawstring[0]==".": return "0."+rawstring[1:]
- elif "e" not in rawstring and "." not in rawstring: return rawstring+".0"
- return rawstring
- except:
- def formatfloat(num):
- return str(num)
- def hasher(keyword): #32bit FNV-1 hash with FNV_offset_basis = 5381 and FNV_prime = 33
- hash = 5381
- for byte in keyword:
- hash = (hash*33) ^ ord(byte)
- return hash & 0xffffffff # use & because Python promotes the num instead of intended overflow
- class Header:
- def __init__(self,varList):
- self.absStringOffset = varList[0] ## absolute offset for string section start
- self.lenStringToEOF = varList[1] ## length from string section start to EOF
- self.numGUID = varList[2] ## number of external GUIDs
- self.numInstanceRepeater = varList[3] ## total number of instance repeaters
- self.numGUIDRepeater = varList[4] ## instance repeaters with GUID
- self.unknown = varList[5]
- self.numComplex = varList[6] ## number of complex entries
- self.numField = varList[7] ## number of field entries
- self.lenName = varList[8] ## length of name section including padding
- self.lenString = varList[9] ## length of string section including padding
- self.numArrayRepeater = varList[10]
- self.lenPayload = varList[11] ## length of normal payload section; the start of the array payload section is absStringOffset+lenString+lenPayload
- class FieldDescriptor:
- def __init__(self,varList,keywordDict):
- self.name = keywordDict[varList[0]]
- self.type = varList[1]
- self.ref = varList[2] #the field may contain another complex
- self.offset = varList[3] #offset in payload section; relative to the complex containing it
- self.secondaryOffset = varList[4]
- if self.name=="$": self.offset-=8
- class ComplexDescriptor:
- def __init__(self,varList,keywordDict):
- self.name = keywordDict[varList[0]]
- self.fieldStartIndex = varList[1] #the index of the first field belonging to the complex
- self.numField = varList[2] #the total number of fields belonging to the complex
- self.alignment = varList[3]
- self.type = varList[4]
- self.size = varList[5] #total length of the complex in the payload section
- self.secondarySize = varList[6] #seems deprecated
- class InstanceRepeater:
- def __init__(self,varList):
- self.complexIndex = varList[0] #index of complex used as the instance
- self.repetitions = varList[1] #number of instance repetitions
- class arrayRepeater:
- def __init__(self,varList):
- self.offset = varList[0] #offset in array payload section
- self.repetitions = varList[1] #number of array repetitions
- self.complexIndex = varList[2] #not necessary for extraction
- class Complex:
- def __init__(self,desc):
- self.desc=desc
- class Field:
- def __init__(self,desc,offset):
- self.desc=desc
- self.offset=offset #track absolute offset of each field in the ebx
- numDict={0xC12D:("Q",8),0xc0cd:("B",1) ,0x0035:("I",4),0xc10d:("I",4),0xc14d:("d",8),0xc0ad:("?",1),0xc0fd:("i",4),0xc0bd:("b",1),0xc0ed:("h",2), 0xc0dd:("H",2), 0xc13d:("f",4)}
- class Dbx:
- def __init__(self, f, relPath):
- #metadata
- magic=f.read(4)
- if magic=="\xCE\xD1\xB2\x0F": self.unpack=unpackLE
- elif magic=="\x0F\xB2\xD1\xCE": self.unpack=unpackBE
- else: raise ValueError("The file is not ebx: "+relPath)
- self.relPath=relPath #to give more feedback for unknown field types
- self.trueFilename=""
- self.header=Header(self.unpack("3I6H3I",f.read(36)))
- self.arraySectionstart=self.header.absStringOffset+self.header.lenString+self.header.lenPayload
- self.fileGUID=f.read(16)
- while f.tell()%16!=0: f.seek(1,1) #padding
- self.externalGUIDs=[(f.read(16),f.read(16)) for i in xrange(self.header.numGUID)]
- self.keywords=str.split(f.read(self.header.lenName),"\x00")
- self.keywordDict=dict((hasher(keyword),keyword) for keyword in self.keywords)
- self.fieldDescriptors=[FieldDescriptor(self.unpack("IHHii",f.read(16)), self.keywordDict) for i in xrange(self.header.numField)]
- self.complexDescriptors=[ComplexDescriptor(self.unpack("IIBBHHH",f.read(16)), self.keywordDict) for i in xrange(self.header.numComplex)]
- self.instanceRepeaters=[InstanceRepeater(self.unpack("2H",f.read(4))) for i in xrange(self.header.numInstanceRepeater)]
- while f.tell()%16!=0: f.seek(1,1) #padding
- self.arrayRepeaters=[arrayRepeater(self.unpack("3I",f.read(12))) for i in xrange(self.header.numArrayRepeater)]
- #payload
- f.seek(self.header.absStringOffset+self.header.lenString)
- self.internalGUIDs=[]
- self.instances=[] # (guid, complex)
- nonGUIDindex=0
- self.isPrimaryInstance=True #first instance is primary
- for i, instanceRepeater in enumerate(self.instanceRepeaters):
- for repetition in xrange(instanceRepeater.repetitions):
- #obey alignment of the instance; peek into the complex for that
- while f.tell()%self.complexDescriptors[instanceRepeater.complexIndex].alignment!=0: f.seek(1,1)
- #all instances after numGUIDRepeater have no guid
- if i<self.header.numGUIDRepeater:
- instanceGUID=f.read(16)
- else:
- #just numerate those instances without guid and assign a big endian int to them.
- instanceGUID=struct.pack(">I",nonGUIDindex)
- nonGUIDindex+=1
- self.internalGUIDs.append(instanceGUID)
- self.instances.append( (instanceGUID,self.readComplex(instanceRepeater.complexIndex,f,True)) )
- self.isPrimaryInstance=False #the readComplex function has used isPrimaryInstance by now
- f.close()
- #if no filename found, use the relative input path instead
- #it's just as good though without capitalization
- if self.trueFilename=="":
- self.trueFilename=relPath
- def readComplex(self, complexIndex, f, isInstance=False):
- complexDesc=self.complexDescriptors[complexIndex]
- cmplx=Complex(complexDesc)
- cmplx.offset=f.tell()
- cmplx.fields=[]
- #alignment 4 instances require subtracting 8 for all field offsets and the complex size
- obfuscationShift=8 if (isInstance and cmplx.desc.alignment==4) else 0
- for fieldIndex in xrange(complexDesc.fieldStartIndex,complexDesc.fieldStartIndex+complexDesc.numField):
- f.seek(cmplx.offset+self.fieldDescriptors[fieldIndex].offset-obfuscationShift)
- cmplx.fields.append(self.readField(fieldIndex,f))
- f.seek(cmplx.offset+complexDesc.size-obfuscationShift)
- return cmplx
- def readField(self,fieldIndex,f):
- fieldDesc = self.fieldDescriptors[fieldIndex]
- field=Field(fieldDesc,f.tell())
- if fieldDesc.type in (0x0029, 0xd029,0x0000,0x8029):
- field.value=self.readComplex(fieldDesc.ref,f)
- elif fieldDesc.type==0x0041:
- arrayRepeater=self.arrayRepeaters[self.unpack("I",f.read(4))[0]]
- arrayComplexDesc=self.complexDescriptors[fieldDesc.ref]
- f.seek(self.arraySectionstart+arrayRepeater.offset)
- arrayComplex=Complex(arrayComplexDesc)
- arrayComplex.fields=[self.readField(arrayComplexDesc.fieldStartIndex,f) for repetition in xrange(arrayRepeater.repetitions)]
- field.value=arrayComplex
- elif fieldDesc.type in (0x407d, 0x409d):
- startPos=f.tell()
- stringOffset=self.unpack("i",f.read(4))[0]
- if stringOffset==-1:
- field.value="*nullString*"
- else:
- f.seek(self.header.absStringOffset+stringOffset)
- field.value=""
- while 1:
- a=f.read(1)
- if a=="\x00": break
- else: field.value+=a
- f.seek(startPos+4)
- if self.isPrimaryInstance and fieldDesc.name=="Name" and self.trueFilename=="": self.trueFilename=field.value
- elif fieldDesc.type in (0x0089,0xc089): #incomplete implementation, only gives back the selected string
- compareValue=self.unpack("i",f.read(4))[0]
- enumComplex=self.complexDescriptors[fieldDesc.ref]
- if enumComplex.numField==0:
- field.value="*nullEnum*"
- for fieldIndex in xrange(enumComplex.fieldStartIndex,enumComplex.fieldStartIndex+enumComplex.numField):
- if self.fieldDescriptors[fieldIndex].offset==compareValue:
- field.value=self.fieldDescriptors[fieldIndex].name
- break
- elif fieldDesc.type==0xc15d:
- field.value=f.read(16)
- elif fieldDesc.type==0x417d:
- field.value=f.read(8)
- else:
- try:
- (typ,length)=numDict[fieldDesc.type]
- num=self.unpack(typ,f.read(length))[0]
- field.value=num
- except:
- print "Unknown field type: "+str(fieldDesc.type)+" File name: "+self.relPath
- field.value="*unknown field type*"
- return field
- def dump(self,outputFolder):
- ## if not self.trueFilename: self.trueFilename=hexlify(self.fileGUID)
- outName=outputFolder+self.trueFilename+EXTENSION
- ## dirName=os.path.dirname(outputFolder+self.trueFilename)
- ## if not os.path.isdir(dirName): os.makedirs(dirName)
- ## if not self.trueFilename: self.trueFilename=hexlify(self.fileGUID)
- ## f2=open(outputFolder+self.trueFilename+EXTENSION,"wb")
- f2=open2(outName,"wb")
- for (guid,instance) in self.instances:
- if instance.desc.name not in IGNOREINSTANCES: #############
- #print
- writeInstance(f2,instance,hexlify(guid))
- self.recurse(instance.fields,f2,0)
- f2.close()
- def recurse(self, fields, f2, lvl): #over fields
- lvl+=1
- for field in fields:
- if field.desc.type in (0x0029,0xd029,0x0000,0x8029):
- if field.desc.name not in IGNOREFIELDS: #############
- writeField(f2,field,lvl,"::"+field.value.desc.name)
- self.recurse(field.value.fields,f2,lvl)
- elif field.desc.type == 0xc13d:
- writeField(f2,field,lvl," "+formatfloat(field.value))
- elif field.desc.type == 0xc15d:
- writeField(f2,field,lvl," "+hexlify(field.value).upper()) #upper case=> chunk guid
- elif field.desc.type==0x417d:
- val=hexlify(field.value)
- ## val=val[:16]+"/"+val[16:]
- writeField(f2,field,lvl," "+val)
- elif field.desc.type == 0x0035:
- towrite=""
- if field.value>>31:
- extguid=self.externalGUIDs[field.value&0x7fffffff]
- try: towrite=guidTable[extguid[0]]+"/"+hexlify(extguid[1])
- except: towrite=hexlify(extguid[0])+"/"+hexlify(extguid[1])
- elif field.value==0: towrite="*nullGuid*"
- else:
- intGuid=self.internalGUIDs[field.value-1]
- towrite=hexlify(intGuid)
- writeField(f2,field,lvl," "+towrite)
- elif field.desc.type==0x0041:
- if len(field.value.fields)==0:
- writeField(f2,field,lvl," *nullArray*")
- else:
- writeField(f2,field,lvl,"::"+field.value.desc.name)
- #quick hack so I can add indices to array members while using the same recurse function
- for index in xrange(len(field.value.fields)):
- member=field.value.fields[index]
- if member.desc.name=="member":
- desc=copy.deepcopy(member.desc)
- desc.name="member("+str(index)+")"
- member.desc=desc
- self.recurse(field.value.fields,f2,lvl)
- else:
- writeField(f2,field,lvl," "+str(field.value))
- def hex2(num):
- #take int, return 8byte string
- a=hex(num)
- if a[:2]=="0x": a=a[2:]
- if a[-1]=="L": a=a[:-1]
- while len(a)<8:
- a="0"+a
- return a
- if printOffsets:
- def writeField(f,field,lvl,text):
- f.write(hex2(field.offset)+SEP+lvl*SEP+field.desc.name+text+"\r\n")
- def writeInstance(f,cmplx,text):
- f.write(hex2(cmplx.offset)+SEP+cmplx.desc.name+" "+text+"\r\n")
- else:
- def writeField(f,field,lvl,text):
- f.write(lvl*SEP+field.desc.name+text+"\r\n")
- def writeInstance(f,cmplx,text):
- f.write(cmplx.desc.name+" "+text+"\r\n")
- if outputFolder[-1] not in ("/","\\"): outputFolder+="\\"
- if inputFolder[-1] not in ("/","\\"): inputFolder+="\\"
- #if there's a guid table already, use it
- try:
- f5=open(guidTableName,"rb")
- guidTable=cPickle.load(f5)
- f5.close()
- except:
- guidTable=dict()
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement