Guest User

ZnK Dumper v2.5

a guest
Jun 26th, 2014
403
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 16.06 KB | None | 0 0
  1. import os
  2. import sys
  3. import struct
  4. import functools
  5.  
  6. facedict = {}
  7. facedict['00'] = 'Lloyd'
  8. facedict['01'] = 'Elie'
  9. facedict['02'] = 'Tio'
  10. facedict['03'] = 'Randy'
  11. facedict['04'] = 'Lazy'
  12. facedict['05'] = 'Noel'
  13. facedict['06'] = 'Dudley'
  14. facedict['07'] = 'Yin'
  15. facedict['08'] = 'Estelle'
  16. facedict['09'] = 'Joshua'
  17. facedict['10'] = 'Sergei'
  18. facedict['11'] = 'KeA'
  19. facedict['12'] = 'Zeit'
  20. facedict['13'] = 'Cecil'
  21. facedict['14'] = 'Arios'
  22. facedict['15'] = 'Sizuku'
  23. facedict['16'] = 'Wald'
  24. facedict['17'] = 'Ilya'
  25. facedict['18'] = 'Rixia'
  26. facedict['19'] = 'Fran'
  27. facedict['20'] = 'Sonya'
  28. facedict['21'] = 'Grace'
  29. facedict['22'] = 'Ian'
  30. facedict['23'] = 'Jona'
  31. facedict['24'] = 'Joachim'
  32. facedict['25'] = 'McDowell'
  33. facedict['26'] = 'Earnest'
  34. facedict['27'] = 'Hartman'
  35. facedict['28'] = 'Dieter'
  36. facedict['29'] = 'Mariabell'
  37. facedict['30'] = 'Marconi'
  38. facedict['31'] = 'Garcia'
  39. facedict['32'] = 'Cao'
  40. facedict['33'] = 'Renne'
  41. facedict['34'] = 'Kirika'
  42. facedict['35'] = 'Lector'
  43. facedict['36'] = 'Harold'
  44. facedict['37'] = 'Sophia'
  45. facedict['38'] = 'Colin'
  46. facedict['39'] = 'Jorg'
  47. facedict['50'] = 'Lloyd (Fancy)'
  48. facedict['51'] = 'Lloyd (Fancy Glasses)'
  49. facedict['52'] = 'Lloyd (Casual)'
  50. facedict['53'] = 'Elie (Fancy)'
  51. facedict['54'] = 'Special'
  52. facedict['55'] = 'Tio (Casual)'
  53. facedict['56'] = 'Randy (Fancy)'
  54. facedict['57'] = 'Lazy (Fancy)'
  55. facedict['58'] = 'KeA (Fancy)'
  56. facedict['59'] = 'Cecil (Fancy)'
  57. facedict['60'] = 'Sizuku (Fancy)'
  58. facedict['61'] = 'Ilya (Dancer)'
  59. facedict['62'] = 'Rixia (Priestess)'
  60. facedict['63'] = 'Noel (Casual)'
  61. facedict['64'] = 'Fran (Casual)'
  62. facedict['65'] = 'McDowell (PJs)'
  63. facedict['66'] = 'Earnest (Suit)'
  64. facedict['67'] = 'Joachim (Blue Hair)'
  65. facedict['68'] = 'Joachim (White Hair)'
  66.  
  67. #Grabs data. It's called by getpointers and myprogram
  68. #myprogram (couldn't think of better name) is the top level function
  69. def get_data(filename):
  70.     totalbytes = os.path.getsize(filename)
  71.     infile = open(filename, 'rb')
  72.     totalfiledata = infile.read(totalbytes)
  73.     return totalfiledata
  74.  
  75. ##sys.argv=[sys.argv[0],'c0000']
  76. print sys.argv[1]
  77. filedata = get_data(sys.argv[1] + '.orig')
  78. EOFposition = filedata.rfind(sys.argv[1])
  79. nameslist = filedata[EOFposition:].split("\x00")
  80.  
  81. #Takes the input string and makes a nicely formatted output string for the translators
  82. def calculateoutputstring(opcodeaddress,inputstring):
  83.     outputstring = '\n' + opcodeaddress + ";" #1st field is address - write to output
  84.     opcode = inputstring[0].encode('hex') #What opcode is it?
  85.     outputstring += opcode + ';' #2nd field is opcode - write to output
  86.     n = -2
  87.     while True: #looking for last user character in opcode
  88.         if ord(inputstring[n]) > 127 and ord(inputstring[n]) < 255 and ord(inputstring[n+1]) > 39:
  89.             lastuserchar = len(inputstring)+n+1 #tells program when to stop
  90.             break
  91.         if ord(inputstring[n]) in (41,32,35):
  92.             lastuserchar = len(inputstring)+n #tells program when to stop
  93.             break
  94.         n -= 1
  95.         if len(inputstring) + n == 0:
  96. ##            print "No text in opcode " + opcode + " at address " + opcodeaddress
  97.             return ""
  98.     strpos = 1
  99.     while True:
  100.         if ord(inputstring[strpos]) == 35: break # Pound character is 1st
  101. # Shift-JIS character is first
  102.         if ord(inputstring[strpos]) > 127 and ord(inputstring[strpos]) < 255 and ord(inputstring[strpos+1]) > 39: break
  103.         if strpos == len(inputstring)-1:
  104.             print "No user text in opcode 5b at address " + opcodeaddress
  105.             return ""
  106.         strpos += 1
  107.  
  108.     startofline = strpos #tells program where the start of the current line is
  109.  
  110.     userlength = len(inputstring)-startofline+n+2
  111.    
  112.    
  113.  
  114.     startflag = True #tells program whether the first line in the opcode has been output yet or not (True = not yet)
  115.     JIScharpos = 0 #misleading name. Really the position of the last ascii character. lastasciichar was too long a name...
  116.  
  117. #Go byte by byte
  118.     while strpos <= lastuserchar + 2: #Until end of the string...
  119. ##        if opcodeaddress == '0x8955':
  120. ##            print 'hello'
  121.         output = False #reset flag
  122. #Our byte is a SHIFT-JIS value
  123.         if ord(inputstring[strpos]) > 127:
  124.             strpos += 2 #move pointer forward and check again
  125. #Our byte is an ASCII value (which is fine too)
  126.         elif int(inputstring[strpos].encode('hex'),16) > 31:
  127.             strpos += 1 #move pointer forward and check again
  128.             if inputstring[strpos] in ('P','K','F','N') and inputstring[strpos-1].isdigit(): #If the character is the end of a text code then:
  129.                 JIScharpos = strpos #Set (or reset) the position of the last ascii character
  130. #Our byte at this point must be some weird Falcom text code
  131. #The 0x00 code is used in 0x5D opcodes to separate the name and what the name should say
  132.         elif inputstring[strpos].encode('hex') == '00':
  133.             if opcode in ('5b','5d'): #For 5D opcodes, the speaker name is in the opcode itself
  134.                 speaker = inputstring[startofline:strpos] + ";"
  135.             if opcode == '5b':
  136.                 breaktype = 'None'
  137.                 codes = ';'
  138.                 speech = ';'
  139.                 outputstring += speaker + codes + speech + breaktype + ";" + str(userlength)
  140.                 return outputstring
  141.             strpos, startofline = strpos + 1, strpos + 1
  142.             JIScharpos = strpos
  143. #The 0x01 opcode is a line break
  144.         elif inputstring[strpos].encode('hex') == '01':
  145.             breaktype = "linebreak"
  146.             output = True #Tells the program to do the "output" routine on this pass through the loop
  147. #We output a line every time there is either linebreak, newdialogbox or terminalcode
  148.  
  149. #There's two codes starting with 0x02 that we know of:
  150. #0x0200 ends the opcode
  151. #0x0203 starts a new dialog box within the same opcode
  152.         elif strpos == lastuserchar + 1: #The pointer (within this program) is at the end of the opcode
  153. #I call it the "pointer" because we are looking at the actual opcode byte by byte, moving the pointer each time
  154.             output = True
  155.             breaktype = "terminalcode"
  156.             if opcode == '55' and inputstring[strpos].encode('hex') == '18':
  157.                 breaktype = "terminalcode18"
  158.         elif inputstring[strpos].encode('hex') == '02':
  159.             output = True
  160.             if inputstring[strpos+1].encode('hex') == '03':
  161.                 breaktype = "newdialogbox"
  162.             elif inputstring[strpos+1].encode('hex') == '01':
  163.                 breaktype = "newdialogbox2"
  164.             else:
  165.                 print "Unknown opcode format in opcode %s at address %s." % (opcode,opcodeaddress)
  166.                 return ""
  167.         elif inputstring[strpos].encode('hex') == '07':
  168.             breaktype = '0x' + inputstring[strpos:strpos+2].encode('hex')
  169.             output = True
  170.         elif inputstring[strpos].encode('hex') == '18':
  171.             if inputstring[strpos+1:strpos+3] == '\x02\x00':
  172.                 breaktype = 'terminalcode18'
  173.             else:
  174.                 print "Unknown opcode format in opcode %s at address %s." % (opcode,opcodeaddress)
  175.                 return ""
  176.         elif inputstring[strpos].encode('hex') == '1f':
  177.             breaktype = '0x' + inputstring[strpos:strpos+5].encode('hex')
  178.             output = True
  179.         else:
  180.             print "Unknown opcode format in opcode %s at address %s." % (opcode,opcodeaddress)
  181.             return ""
  182.  
  183.         if output: #output routine
  184.  
  185.             thisline = inputstring[startofline:strpos] #Gives the string. Further processed below.
  186.             JIScharpos += 1 #That's because the value computed above is really the character _before_ the first JIS character
  187.  
  188.             if len(inputstring[startofline:JIScharpos]) > 1: #There are ASCII characters in thisline
  189.                 codes = inputstring[startofline:JIScharpos] + ";" #Grabs the codes
  190.                 speech = inputstring[JIScharpos:strpos] + ";" #Grabs the non-codes part of the line
  191.                 if codes.find('F') > -1 and opcode != '5d': #If there is an "F" code in the codes, we need the faces routine
  192.                     facecodepos = codes.find('F') #Gets face code position within the codes
  193.                     facecode1 = thisline[facecodepos-4:facecodepos-2] #First two numbers of face code (as string)
  194.                     facecode2 = thisline[facecodepos-2:facecodepos] #Last two numbers of face code (as string)
  195.                     speaker = facedict.get(facecode1,'Unknown') + ";" #You know that dictionary at the top? Go get the name based on the 1st two numbers.
  196.                     if speaker == 'Special': #What to do if the face code starts with "54"
  197.                         if int(facecode2) < 12: speaker = "Tio (Fancy);"
  198.                         else: speaker = "Zeit;"
  199.                 elif opcode != '5d': #For 5D opcodes, the speaker has already been set; we don't want to mess that up.
  200.                     if opcode == '5c' and inputstring[2] == "\x00":
  201.                         nameindex = ord(inputstring[1]) - 7
  202.                         if nameindex < len(nameslist):
  203.                             speaker = nameslist[nameindex] + ";"
  204.                         else:
  205.                             speaker = ";"
  206.                     else:
  207.                         speaker = ";" #For non 5D opcodes, there's no speaker on this line, so we make a blank
  208.  
  209.             else: #No text codes on this line - make some blanks
  210.                 codes = ";"
  211.                 if opcode == '5c' and inputstring[2] == "\x00":
  212.                     nameindex = ord(inputstring[1]) - 7
  213.                     if nameindex < len(nameslist):
  214.                         speaker = nameslist[nameindex] + ";"
  215.                     else:
  216.                         speaker = ";"
  217.                 else:
  218.                     speaker = ";"
  219.                 speech = thisline + ";"
  220.  
  221.             if startflag == True: #What to do on the first pass
  222.                 outputstring += speaker + codes + speech + breaktype + ";" + str(userlength)
  223.                 startflag = False
  224.             else: #Second and later passes have a newline and don't have address or opcode, so two blanks are needed
  225.                 outputstring += "\n;;" + speaker + codes + speech + breaktype
  226.  
  227.             if breaktype == "linebreak": #Update state variables at the end, move the pointer, etc...
  228.                 strpos, startofline = strpos + 1, strpos + 1
  229.             elif breaktype == 'terminalcode18':
  230.                 strpos, startofline = strpos + 3, strpos + 3
  231.             elif breaktype[0:4] == '0x1f':
  232.                 strpos, startofline = strpos + 5, strpos + 5
  233.             else:
  234.                 strpos, startofline = strpos + 2, strpos + 2
  235.  
  236.             JIScharpos = strpos
  237.            
  238.     return outputstring
  239.  
  240. #Loads the *.pre file and returns the pointers from it
  241. def getpointers():
  242. #    filedata = get_data(filename)
  243. #    opcodepos = filedata.find('\x00\xcc\xcc')-1 #I can't decode .pre headers so this'll have to do.
  244. #    if opcodepos < 0:
  245. #        return 'error' #if there's no dialog here then exit program entirely
  246.     opcodes = []
  247.     pointers = []
  248. #    while opcodepos < len(filedata)-12: #Weird magic number here ("12")
  249. # Could use some help getting rid of this magic number
  250. # I need the program to stop before the end of the file to avoid string index out of range error
  251. # The last opcode usually doesn't point to dialog so I think this could be fine for now
  252. #        opcode = []
  253. #        for n in range(11):
  254. #            opcode.append(filedata[opcodepos+n].encode('hex'))
  255. #        opcodepos += 12 #This magic number is fine; opcodes/pointer bytes/whatever in .pre are 12 bytes long
  256. #        opcodes.append(opcode)
  257. #    for opcode in opcodes:
  258. #        if [opcode[2],opcode[3]] == ['cc','cc']:
  259. # It converts the little endian value (which makes no sense) to big endian (which does make sense)
  260. #            thisval = hex(struct.unpack('<I',(opcode[4] + opcode[5] + opcode[6] + opcode[7]).decode('hex'))[0])
  261. #            if not thisval in pointers:
  262. #                pointers.append(thisval)
  263.     strpos = 0
  264.  
  265. #    filedata = get_data(filename2)
  266.     while strpos > -1:
  267.         strpos = filedata.find('\x5e\x00\x00\xff\xff',strpos+1)
  268.         if strpos > 0 and hex(strpos) not in pointers:
  269.             pointers.append(hex(strpos))
  270.  
  271.     strpos = 0
  272.     while strpos > -1:
  273.         strpos = filedata.find('\x55\xff\x00',strpos+1)
  274.         if strpos > 0 and hex(strpos) not in pointers:
  275.             pointers.append(hex(strpos))
  276.        
  277.     strpos = 0
  278.     while strpos > -1:
  279.         strpos = filedata.find('\x55\x01\x01',strpos+1)
  280.         if strpos > 0 and hex(strpos) not in pointers:
  281.             pointers.append(hex(strpos))
  282.        
  283.     strpos = 0
  284.     while strpos > -1:
  285.         strpos = filedata.find('\x5c',strpos+1)
  286.         if strpos > 0 and ord(filedata[strpos-1]) < 128:
  287.             if ord(filedata[strpos+3]) == 35:
  288.                 if hex(strpos) not in pointers: pointers.append(hex(strpos))
  289.             elif ord(filedata[strpos+3]) > 127 and ord(filedata[strpos+3]) < 255 and ord(filedata[strpos+4]) > 39:
  290.                 if hex(strpos) not in pointers: pointers.append(hex(strpos))
  291.  
  292.     strpos = 0
  293.     while strpos > -1:
  294.         strpos = filedata.find('\x5d',strpos+1)
  295.         if strpos > 0 and ord(filedata[strpos-1]) < 128:
  296.             if ord(filedata[strpos+3]) > 127 and ord(filedata[strpos+3]) < 255 and ord(filedata[strpos+4]) > 39:
  297.                 if hex(strpos) not in pointers: pointers.append(hex(strpos))
  298.    
  299.     strpos = 0
  300.     while strpos > -1:
  301.         strpos = filedata.find('\x5b\xff\xff',strpos+1)
  302.         if strpos > 0:
  303.             if hex(strpos) not in pointers: pointers.append(hex(strpos))
  304.  
  305.     if pointers == []:
  306.         print "No dialogue in this file."
  307.         quit()
  308.        
  309.     return pointers
  310.  
  311. def myprogram(filename):
  312.     pointers = getpointers() #First grab the pointers. We'll need these later.
  313.  
  314. #    filedata = get_data(filename)
  315.     outfiledata = filename
  316.  
  317.     for pointer in sorted(pointers,key=functools.partial(int,base=16)):
  318. # The 5C and 5D opcodes we're looking for in this program are variable length.
  319. # We have to search for their ends.
  320.         if int(pointer,16) < len(filedata):
  321.             opcode = filedata[int(pointer,16)].encode('hex')
  322.             if opcode == '5e':
  323.                 strend = filedata.find('\x01\x00',int(pointer,16)) #Find the end of the opcode
  324.                 strend += 2
  325.             elif opcode in ('55','5c','5d'):
  326.                 strend = filedata.find('\x02\x00',int(pointer,16)) #Find the end of the opcode
  327.                 strend += 2
  328.             elif opcode == '5b':
  329.                 strpos = int(pointer,16)
  330.                 while True:
  331.                     if filedata[strpos].encode('hex') == '55':
  332.                         if ord(filedata[strpos-1]) < 128 or ord(filedata[strpos-1]) > 254:
  333.                             strend = strpos
  334.                             break
  335.                     strpos += 1
  336.                     if strpos - int(pointer,16) > 30:
  337.                         print "5b opcode miss at address " + pointer
  338.                         opcode = ""
  339.                         break
  340.  
  341.             else:
  342.                 print "Unknown opcode " + filedata[int(pointer,16)].encode('hex') + " at address " + pointer
  343.     # Pass the pointer address and entire opcode to a function for formatting the dump
  344.             if opcode in ('55','5c','5d','5e','5b'):
  345.                 if filedata[int(pointer,16):strend] != "":
  346.                     outputstring = calculateoutputstring(pointer,filedata[int(pointer,16):strend])
  347.                     outfiledata += outputstring #Append formatted string to program output
  348.         else:
  349.             print "Data at address " + pointer + " is not within the file."
  350.  
  351.     if outfiledata == filename:
  352.         print "No dialog in this file."
  353.         quit()
  354.        
  355.     outfile = open(os.path.splitext(filename)[0] + '.data','wb')
  356.     outfile.write(outfiledata) #write the output
  357.     outfile.close
  358.        
  359. ##if __name__ == '__main__':
  360. ##
  361. ##    sys.argv=[sys.argv[0],'e0410']
  362. ##    myprogram(sys.argv[1] + '.orig',sys.argv[1] + '.pre')
  363. myprogram(sys.argv[1] + '.orig')
Add Comment
Please, Sign In to add comment