Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import sys
- import struct
- import functools
- facedict = {}
- facedict['00'] = 'Lloyd'
- facedict['01'] = 'Elie'
- facedict['02'] = 'Tio'
- facedict['03'] = 'Randy'
- facedict['04'] = 'Lazy'
- facedict['05'] = 'Noel'
- facedict['06'] = 'Dudley'
- facedict['07'] = 'Yin'
- facedict['08'] = 'Estelle'
- facedict['09'] = 'Joshua'
- facedict['10'] = 'Sergei'
- facedict['11'] = 'KeA'
- facedict['12'] = 'Zeit'
- facedict['13'] = 'Cecil'
- facedict['14'] = 'Arios'
- facedict['15'] = 'Sizuku'
- facedict['16'] = 'Wald'
- facedict['17'] = 'Ilya'
- facedict['18'] = 'Rixia'
- facedict['19'] = 'Fran'
- facedict['20'] = 'Sonya'
- facedict['21'] = 'Grace'
- facedict['22'] = 'Ian'
- facedict['23'] = 'Jona'
- facedict['24'] = 'Joachim'
- facedict['25'] = 'McDowell'
- facedict['26'] = 'Earnest'
- facedict['27'] = 'Hartman'
- facedict['28'] = 'Dieter'
- facedict['29'] = 'Mariabell'
- facedict['30'] = 'Marconi'
- facedict['31'] = 'Garcia'
- facedict['32'] = 'Cao'
- facedict['33'] = 'Renne'
- facedict['34'] = 'Kirika'
- facedict['35'] = 'Lector'
- facedict['36'] = 'Harold'
- facedict['37'] = 'Sophia'
- facedict['38'] = 'Colin'
- facedict['39'] = 'Jorg'
- facedict['50'] = 'Lloyd (Fancy)'
- facedict['51'] = 'Lloyd (Fancy Glasses)'
- facedict['52'] = 'Lloyd (Casual)'
- facedict['53'] = 'Elie (Fancy)'
- facedict['54'] = 'Special'
- facedict['55'] = 'Tio (Casual)'
- facedict['56'] = 'Randy (Fancy)'
- facedict['57'] = 'Lazy (Fancy)'
- facedict['58'] = 'KeA (Fancy)'
- facedict['59'] = 'Cecil (Fancy)'
- facedict['60'] = 'Sizuku (Fancy)'
- facedict['61'] = 'Ilya (Dancer)'
- facedict['62'] = 'Rixia (Priestess)'
- facedict['63'] = 'Noel (Casual)'
- facedict['64'] = 'Fran (Casual)'
- facedict['65'] = 'McDowell (PJs)'
- facedict['66'] = 'Earnest (Suit)'
- facedict['67'] = 'Joachim (Blue Hair)'
- facedict['68'] = 'Joachim (White Hair)'
- #Grabs data. It's called by getpointers and myprogram
- #myprogram (couldn't think of better name) is the top level function
- def get_data(filename):
- totalbytes = os.path.getsize(filename)
- infile = open(filename, 'rb')
- totalfiledata = infile.read(totalbytes)
- return totalfiledata
- ##sys.argv=[sys.argv[0],'c0000']
- print sys.argv[1]
- filedata = get_data(sys.argv[1] + '.orig')
- EOFposition = filedata.rfind(sys.argv[1])
- nameslist = filedata[EOFposition:].split("\x00")
- #Takes the input string and makes a nicely formatted output string for the translators
- def calculateoutputstring(opcodeaddress,inputstring):
- outputstring = '\n' + opcodeaddress + ";" #1st field is address - write to output
- opcode = inputstring[0].encode('hex') #What opcode is it?
- outputstring += opcode + ';' #2nd field is opcode - write to output
- n = -2
- while True: #looking for last user character in opcode
- if ord(inputstring[n]) > 127 and ord(inputstring[n]) < 255 and ord(inputstring[n+1]) > 39:
- lastuserchar = len(inputstring)+n+1 #tells program when to stop
- break
- if ord(inputstring[n]) in (41,32,35):
- lastuserchar = len(inputstring)+n #tells program when to stop
- break
- n -= 1
- if len(inputstring) + n == 0:
- ## print "No text in opcode " + opcode + " at address " + opcodeaddress
- return ""
- strpos = 1
- while True:
- if ord(inputstring[strpos]) == 35: break # Pound character is 1st
- # Shift-JIS character is first
- if ord(inputstring[strpos]) > 127 and ord(inputstring[strpos]) < 255 and ord(inputstring[strpos+1]) > 39: break
- if strpos == len(inputstring)-1:
- print "No user text in opcode 5b at address " + opcodeaddress
- return ""
- strpos += 1
- startofline = strpos #tells program where the start of the current line is
- userlength = len(inputstring)-startofline+n+2
- startflag = True #tells program whether the first line in the opcode has been output yet or not (True = not yet)
- JIScharpos = 0 #misleading name. Really the position of the last ascii character. lastasciichar was too long a name...
- #Go byte by byte
- while strpos <= lastuserchar + 2: #Until end of the string...
- ## if opcodeaddress == '0x8955':
- ## print 'hello'
- output = False #reset flag
- #Our byte is a SHIFT-JIS value
- if ord(inputstring[strpos]) > 127:
- strpos += 2 #move pointer forward and check again
- #Our byte is an ASCII value (which is fine too)
- elif int(inputstring[strpos].encode('hex'),16) > 31:
- strpos += 1 #move pointer forward and check again
- if inputstring[strpos] in ('P','K','F','N') and inputstring[strpos-1].isdigit(): #If the character is the end of a text code then:
- JIScharpos = strpos #Set (or reset) the position of the last ascii character
- #Our byte at this point must be some weird Falcom text code
- #The 0x00 code is used in 0x5D opcodes to separate the name and what the name should say
- elif inputstring[strpos].encode('hex') == '00':
- if opcode in ('5b','5d'): #For 5D opcodes, the speaker name is in the opcode itself
- speaker = inputstring[startofline:strpos] + ";"
- if opcode == '5b':
- breaktype = 'None'
- codes = ';'
- speech = ';'
- outputstring += speaker + codes + speech + breaktype + ";" + str(userlength)
- return outputstring
- strpos, startofline = strpos + 1, strpos + 1
- JIScharpos = strpos
- #The 0x01 opcode is a line break
- elif inputstring[strpos].encode('hex') == '01':
- breaktype = "linebreak"
- output = True #Tells the program to do the "output" routine on this pass through the loop
- #We output a line every time there is either linebreak, newdialogbox or terminalcode
- #There's two codes starting with 0x02 that we know of:
- #0x0200 ends the opcode
- #0x0203 starts a new dialog box within the same opcode
- elif strpos == lastuserchar + 1: #The pointer (within this program) is at the end of the opcode
- #I call it the "pointer" because we are looking at the actual opcode byte by byte, moving the pointer each time
- output = True
- breaktype = "terminalcode"
- if opcode == '55' and inputstring[strpos].encode('hex') == '18':
- breaktype = "terminalcode18"
- elif inputstring[strpos].encode('hex') == '02':
- output = True
- if inputstring[strpos+1].encode('hex') == '03':
- breaktype = "newdialogbox"
- elif inputstring[strpos+1].encode('hex') == '01':
- breaktype = "newdialogbox2"
- else:
- print "Unknown opcode format in opcode %s at address %s." % (opcode,opcodeaddress)
- return ""
- elif inputstring[strpos].encode('hex') == '07':
- breaktype = '0x' + inputstring[strpos:strpos+2].encode('hex')
- output = True
- elif inputstring[strpos].encode('hex') == '18':
- if inputstring[strpos+1:strpos+3] == '\x02\x00':
- breaktype = 'terminalcode18'
- else:
- print "Unknown opcode format in opcode %s at address %s." % (opcode,opcodeaddress)
- return ""
- elif inputstring[strpos].encode('hex') == '1f':
- breaktype = '0x' + inputstring[strpos:strpos+5].encode('hex')
- output = True
- else:
- print "Unknown opcode format in opcode %s at address %s." % (opcode,opcodeaddress)
- return ""
- if output: #output routine
- thisline = inputstring[startofline:strpos] #Gives the string. Further processed below.
- JIScharpos += 1 #That's because the value computed above is really the character _before_ the first JIS character
- if len(inputstring[startofline:JIScharpos]) > 1: #There are ASCII characters in thisline
- codes = inputstring[startofline:JIScharpos] + ";" #Grabs the codes
- speech = inputstring[JIScharpos:strpos] + ";" #Grabs the non-codes part of the line
- if codes.find('F') > -1 and opcode != '5d': #If there is an "F" code in the codes, we need the faces routine
- facecodepos = codes.find('F') #Gets face code position within the codes
- facecode1 = thisline[facecodepos-4:facecodepos-2] #First two numbers of face code (as string)
- facecode2 = thisline[facecodepos-2:facecodepos] #Last two numbers of face code (as string)
- speaker = facedict.get(facecode1,'Unknown') + ";" #You know that dictionary at the top? Go get the name based on the 1st two numbers.
- if speaker == 'Special': #What to do if the face code starts with "54"
- if int(facecode2) < 12: speaker = "Tio (Fancy);"
- else: speaker = "Zeit;"
- elif opcode != '5d': #For 5D opcodes, the speaker has already been set; we don't want to mess that up.
- if opcode == '5c' and inputstring[2] == "\x00":
- nameindex = ord(inputstring[1]) - 7
- if nameindex < len(nameslist):
- speaker = nameslist[nameindex] + ";"
- else:
- speaker = ";"
- else:
- speaker = ";" #For non 5D opcodes, there's no speaker on this line, so we make a blank
- else: #No text codes on this line - make some blanks
- codes = ";"
- if opcode == '5c' and inputstring[2] == "\x00":
- nameindex = ord(inputstring[1]) - 7
- if nameindex < len(nameslist):
- speaker = nameslist[nameindex] + ";"
- else:
- speaker = ";"
- else:
- speaker = ";"
- speech = thisline + ";"
- if startflag == True: #What to do on the first pass
- outputstring += speaker + codes + speech + breaktype + ";" + str(userlength)
- startflag = False
- else: #Second and later passes have a newline and don't have address or opcode, so two blanks are needed
- outputstring += "\n;;" + speaker + codes + speech + breaktype
- if breaktype == "linebreak": #Update state variables at the end, move the pointer, etc...
- strpos, startofline = strpos + 1, strpos + 1
- elif breaktype == 'terminalcode18':
- strpos, startofline = strpos + 3, strpos + 3
- elif breaktype[0:4] == '0x1f':
- strpos, startofline = strpos + 5, strpos + 5
- else:
- strpos, startofline = strpos + 2, strpos + 2
- JIScharpos = strpos
- return outputstring
- #Loads the *.pre file and returns the pointers from it
- def getpointers():
- # filedata = get_data(filename)
- # opcodepos = filedata.find('\x00\xcc\xcc')-1 #I can't decode .pre headers so this'll have to do.
- # if opcodepos < 0:
- # return 'error' #if there's no dialog here then exit program entirely
- opcodes = []
- pointers = []
- # while opcodepos < len(filedata)-12: #Weird magic number here ("12")
- # Could use some help getting rid of this magic number
- # I need the program to stop before the end of the file to avoid string index out of range error
- # The last opcode usually doesn't point to dialog so I think this could be fine for now
- # opcode = []
- # for n in range(11):
- # opcode.append(filedata[opcodepos+n].encode('hex'))
- # opcodepos += 12 #This magic number is fine; opcodes/pointer bytes/whatever in .pre are 12 bytes long
- # opcodes.append(opcode)
- # for opcode in opcodes:
- # if [opcode[2],opcode[3]] == ['cc','cc']:
- # It converts the little endian value (which makes no sense) to big endian (which does make sense)
- # thisval = hex(struct.unpack('<I',(opcode[4] + opcode[5] + opcode[6] + opcode[7]).decode('hex'))[0])
- # if not thisval in pointers:
- # pointers.append(thisval)
- strpos = 0
- # filedata = get_data(filename2)
- while strpos > -1:
- strpos = filedata.find('\x5e\x00\x00\xff\xff',strpos+1)
- if strpos > 0 and hex(strpos) not in pointers:
- pointers.append(hex(strpos))
- strpos = 0
- while strpos > -1:
- strpos = filedata.find('\x55\xff\x00',strpos+1)
- if strpos > 0 and hex(strpos) not in pointers:
- pointers.append(hex(strpos))
- strpos = 0
- while strpos > -1:
- strpos = filedata.find('\x55\x01\x01',strpos+1)
- if strpos > 0 and hex(strpos) not in pointers:
- pointers.append(hex(strpos))
- strpos = 0
- while strpos > -1:
- strpos = filedata.find('\x5c',strpos+1)
- if strpos > 0 and ord(filedata[strpos-1]) < 128:
- if ord(filedata[strpos+3]) == 35:
- if hex(strpos) not in pointers: pointers.append(hex(strpos))
- elif ord(filedata[strpos+3]) > 127 and ord(filedata[strpos+3]) < 255 and ord(filedata[strpos+4]) > 39:
- if hex(strpos) not in pointers: pointers.append(hex(strpos))
- strpos = 0
- while strpos > -1:
- strpos = filedata.find('\x5d',strpos+1)
- if strpos > 0 and ord(filedata[strpos-1]) < 128:
- if ord(filedata[strpos+3]) > 127 and ord(filedata[strpos+3]) < 255 and ord(filedata[strpos+4]) > 39:
- if hex(strpos) not in pointers: pointers.append(hex(strpos))
- strpos = 0
- while strpos > -1:
- strpos = filedata.find('\x5b\xff\xff',strpos+1)
- if strpos > 0:
- if hex(strpos) not in pointers: pointers.append(hex(strpos))
- if pointers == []:
- print "No dialogue in this file."
- quit()
- return pointers
- def myprogram(filename):
- pointers = getpointers() #First grab the pointers. We'll need these later.
- # filedata = get_data(filename)
- outfiledata = filename
- for pointer in sorted(pointers,key=functools.partial(int,base=16)):
- # The 5C and 5D opcodes we're looking for in this program are variable length.
- # We have to search for their ends.
- if int(pointer,16) < len(filedata):
- opcode = filedata[int(pointer,16)].encode('hex')
- if opcode == '5e':
- strend = filedata.find('\x01\x00',int(pointer,16)) #Find the end of the opcode
- strend += 2
- elif opcode in ('55','5c','5d'):
- strend = filedata.find('\x02\x00',int(pointer,16)) #Find the end of the opcode
- strend += 2
- elif opcode == '5b':
- strpos = int(pointer,16)
- while True:
- if filedata[strpos].encode('hex') == '55':
- if ord(filedata[strpos-1]) < 128 or ord(filedata[strpos-1]) > 254:
- strend = strpos
- break
- strpos += 1
- if strpos - int(pointer,16) > 30:
- print "5b opcode miss at address " + pointer
- opcode = ""
- break
- else:
- print "Unknown opcode " + filedata[int(pointer,16)].encode('hex') + " at address " + pointer
- # Pass the pointer address and entire opcode to a function for formatting the dump
- if opcode in ('55','5c','5d','5e','5b'):
- if filedata[int(pointer,16):strend] != "":
- outputstring = calculateoutputstring(pointer,filedata[int(pointer,16):strend])
- outfiledata += outputstring #Append formatted string to program output
- else:
- print "Data at address " + pointer + " is not within the file."
- if outfiledata == filename:
- print "No dialog in this file."
- quit()
- outfile = open(os.path.splitext(filename)[0] + '.data','wb')
- outfile.write(outfiledata) #write the output
- outfile.close
- ##if __name__ == '__main__':
- ##
- ## sys.argv=[sys.argv[0],'e0410']
- ## myprogram(sys.argv[1] + '.orig',sys.argv[1] + '.pre')
- myprogram(sys.argv[1] + '.orig')
Add Comment
Please, Sign In to add comment