Znk Dumper v2

#OK guys, I "cheated" and modified the program to get the opcode positions from the .pre files
#Not sure if that was a good idea or not but...
import os
import sys
import struct

facedict = {}
facedict['00'] = 'Lloyd'
facedict['01'] = 'Elie'
facedict['02'] = 'Tio'
facedict['03'] = 'Randy'
facedict['04'] = 'Lazy'
facedict['05'] = 'Noel'
facedict['06'] = 'Dudley'
facedict['07'] = 'Yin'
facedict['08'] = 'Estelle'
facedict['09'] = 'Joshua'
facedict['10'] = 'Sergei'
facedict['11'] = 'KeA'
facedict['12'] = 'Zeit'
facedict['13'] = 'Cecil'
facedict['14'] = 'Arios'
facedict['15'] = 'Sizuku'
facedict['16'] = 'Wald'
facedict['17'] = 'Ilya'
facedict['18'] = 'Rixia'
facedict['19'] = 'Fran'
facedict['20'] = 'Sonya'
facedict['21'] = 'Grace'
facedict['22'] = 'Ian'
facedict['23'] = 'Jona'
facedict['24'] = 'Joachim'
facedict['25'] = 'McDowell'
facedict['26'] = 'Earnest'
facedict['27'] = 'Hartman'
facedict['28'] = 'Dieter'
facedict['29'] = 'Mariabell'
facedict['30'] = 'Marconi'
facedict['31'] = 'Garcia'
facedict['32'] = 'Cao'
facedict['33'] = 'Renne'
facedict['34'] = 'Kirika'
facedict['35'] = 'Lector'
facedict['36'] = 'Harold'
facedict['37'] = 'Sophia'
facedict['38'] = 'Colin'
facedict['39'] = 'Jorg'
facedict['50'] = 'Lloyd (Fancy)'
facedict['51'] = 'Lloyd (Fancy Glasses)'
facedict['52'] = 'Lloyd (Casual)'
facedict['53'] = 'Elie (Fancy)'
facedict['54'] = 'Special'
facedict['55'] = 'Tio (Casual)'
facedict['56'] = 'Randy (Fancy)'
facedict['57'] = 'Lazy (Fancy)'
facedict['58'] = 'KeA (Fancy)'
facedict['59'] = 'Cecil (Fancy)'
facedict['60'] = 'Sizuku (Fancy)'
facedict['61'] = 'Ilya (Dancer)'
facedict['62'] = 'Rixia (Priestess)'
facedict['63'] = 'Noel (Casual)'
facedict['64'] = 'Fran (Casual)'
facedict['65'] = 'McDowell (PJs)'
facedict['66'] = 'Earnest (Suit)'
facedict['67'] = 'Joachim (Blue Hair)'
facedict['68'] = 'Joachim (White Hair)'

#Grabs data. It's called by getpointers and myprogram
#myprogram (couldn't think of better name) is the top level function
def get_data(filename):
    totalbytes = os.path.getsize(filename)
    infile = open(filename, 'rb')
    totalfiledata = infile.read(totalbytes)
    return totalfiledata

#Takes the input string and makes a nicely formatted output string for the translators
def calculateoutputstring(opcodeaddress,inputstring):
    speaker = ""
    outputstring = '\n' + opcodeaddress + " " #1st field is address - write to output
    opcode = inputstring[0].encode('hex') #What opcode is it?
    outputstring += opcode + ' ' #2nd field is opcode - write to output
    n = -4
    while ord(inputstring[n]) < 128: #looking for last shift-JIS character in opcode
        n -= 2
    if opcode == '55': userlength = len(inputstring)-5+n+1 #length varies per opcode
    elif opcode == '5c': userlength = len(inputstring)-3+n+1
    elif opcode == '5d': userlength = len(inputstring)-3+n+1
    else:
        print 'Unknown opcode %s at address %s.' % (opcode,opcodeaddress)
        exit

    if opcode == '55': strpos = 5 #start position varies per opcode
    elif opcode == '5c': strpos = 3
    elif opcode == '5d': strpos = 3
    lastuserchar = len(inputstring)+n+1 #tells program when to stop

    startflag = True #tells program whether the first line in the opcode has been output yet or not (True = not yet)
    startofline = strpos #tells program where the start of the current line is
    JIScharpos = 0 #misleading name. Really the position of the last ascii character. lastasciichar was too long a name...

#Go byte by byte
    while strpos <= lastuserchar + 2: #Until end of the string...
        output = False #reset flag
#Our byte is a SHIFT-JIS value
        if int(inputstring[strpos].encode('hex'),16) > 127:
            strpos += 2 #move pointer forward and check again
#Our byte is an ASCII value (which is fine too)
        elif int(inputstring[strpos].encode('hex'),16) > 19:
            strpos += 1 #move pointer forward and check again
            if inputstring[strpos] in ('P','K','F','N'): #If the character is the end of a text code then:
                JIScharpos = strpos #Set (or reset) the position of the last ascii character
#Our byte at this point must be some weird Falcom text code
#The 0x00 code is used in 0x5D opcodes to separate the name and what the name should say
        elif inputstring[strpos].encode('hex') == '00':
            if opcode == '5d': #For 5D opcodes, the speaker name is in the opcode itself
                speaker = inputstring[startofline:strpos] + " "
            strpos, startofline = strpos + 1, strpos + 1
            JIScharpos = strpos
#The 0x01 opcode is a line break
        elif inputstring[strpos].encode('hex') == '01':
            breaktype = "linebreak"
            output = True #Tells the program to do the "output" routine on this pass through the loop
#We output a line every time there is either linebreak, newdialogbox or terminalcode

#There's two codes starting with 0x02 that we know of:
#0x0200 ends the opcode
#0x0203 starts a new dialog box within the same opcode
        elif strpos == lastuserchar + 1: #The pointer (within this program) is at the end of the opcode
#I call it the "pointer" because we are looking at the actual opcode byte by byte, moving the pointer each time
            output = True
            breaktype = "terminalcode"

        elif inputstring[strpos].encode('hex') == '02':
            output = True
            if inputstring[strpos+1].encode('hex') == '03':
                breaktype = "newdialogbox"
            else:
                print "Unknown opcode format at address %s." % opcodeaddress
                exit
        else:
            print "Unknown opcode format at address %s." % opcodeaddress
            exit

        if output: #output routine

            thisline = inputstring[startofline:strpos] #Gives the string. Further processed below.
            JIScharpos += 1 #That's because the value computed above is really the character _before_ the first JIS character

            if len(inputstring[startofline:JIScharpos]) > 1: #There are ASCII characters in thisline
                codes = inputstring[startofline:JIScharpos] + " " #Grabs the codes
                speech = inputstring[JIScharpos:strpos] + " " #Grabs the non-codes part of the line
                if codes.find('F') > -1 and opcode != '5d': #If there is an "F" code in the codes, we need the faces routine
                    facecodepos = codes.find('F') #Gets face code position within the codes
                    facecode1 = thisline[facecodepos-4:facecodepos-2] #First two numbers of face code (as string)
                    facecode2 = thisline[facecodepos-2:facecodepos] #Last two numbers of face code (as string)
                    speaker = facedict[facecode1] + " " #You know that dictionary at the top? Go get the name based on the 1st two numbers.
                    if speaker == 'Special': #What to do if the face code starts with "54"
                        if int(facecode2) < 12: speaker = "Tio (Fancy) "
                        else: speaker = "Zeit "
                elif opcode != '5d': #For 5D opcodes, the speaker has already been set; we don't want to mess that up.
                    speaker = " " #For non 5D opcodes, there's no speaker on this line, so we make a blank
            else: #No text codes on this line - make some blanks
                codes = " "
                speaker = " "
                speech = thisline + " "

            if startflag == True: #What to do on the first pass
                outputstring += speaker + codes + speech + breaktype + " " + str(userlength)
                startflag = False
            else: #Second and later passes have a newline and don't have address or opcode, so two blanks are needed
                outputstring += "\n  " + speaker + codes + speech + breaktype

            if breaktype == "linebreak": #Update state variables at the end, move the pointer, etc...
                strpos, startofline = strpos + 1, strpos + 1
            else:
                strpos, startofline = strpos + 2, strpos + 2

            JIScharpos = strpos

    return outputstring

#Loads the *.pre file and returns the pointers from it
def getpointers(filename):
    filedata = get_data(filename)
    opcodepos = filedata.find('\xcc\xcc')-2 #I can't decode .pre headers so this'll have to do.
    if opcodepos == 0: exit #if there's no dialog here then exit program entirely
    opcodes = []
    pointers = []
    while opcodepos < len(filedata)-12: #Weird magic number here ("12")
# Could use some help getting rid of this magic number
# I need the program to stop before the end of the file to avoid string index out of range error
# The last opcode usually doesn't point to dialog so I think this could be fine for now
        opcode = []
        for n in range(11):
            opcode.append(filedata[opcodepos+n].encode('hex'))
        opcodepos += 12 #This magic number is fine; opcodes/pointer bytes/whatever in .pre are 12 bytes long
        opcodes.append(opcode)
    del opcodes[-1]
    for opcode in opcodes:
        if [opcode[2],opcode[3]] == ['cc','cc']:
# It converts the little endian value (which makes no sense) to big endian (which does make sense)
            thisval = hex(struct.unpack('<H',(opcode[4] + opcode[5]).decode('hex'))[0])
            if not thisval in pointers:
                pointers.append(thisval)
    return pointers

def myprogram(filename,filename2):
    pointers = getpointers(filename2) #First grab the pointers. We'll need these later.
    filedata = get_data(filename)
    firstpass = True
    outfiledata = filename

    for pointer in pointers:
# The 5C and 5D opcodes we're looking for in this program are variable length.
# We have to search for their ends.
        strend = filedata.find('\x02\x00',int(pointer,16)) #Find the end of the opcode
# Pass the pointer address and entire opcode to a function for formatting the dump
        outputstring = calculateoutputstring(pointer,filedata[int(pointer,16):strend+2])
        outfiledata += outputstring #Append formatted string to program output

    outfile = open(os.path.splitext(filename)[0] + '.data','wb')
    outfile.write(outfiledata) #write the output
    outfile.close

if __name__ == '__main__':
#    sys.argv=[sys.argv[0],'m3000.bin','m3000.pre'] I use this line for testing in IDLE
    myprogram(sys.argv[1],sys.argv[2])