Wander Wonder Script Insert v1.2

#1.2    Accurately measure original length of name strings
#1.1    Change to accomodate new dump format
#1      Initial release
import os
import struct
import sys
import codecs
#List of ASCII characters used in scripts
asciichar = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ()0123456789\x00'
#Directory name where bit-flipped files will go
pathname = 'flipped'

#Gets data
def get_data(filename):
    totalbytes = os.path.getsize(filename)
    infile = open(filename, 'rb')
    totalfiledata = infile.read(totalbytes)
    infile.close()
    return totalfiledata

def is_jis_char(s):
#Return whether a 2-length string is a SHIFT-JIS character
    if len(s) != 2: #If it's not a string of length 2, then...
        return False
    try:            #Python error handling
        if len(s.decode('cp932')) != 1: #For example, 2 ascii characters will be length 2
            return False                #Two bytes that decode to a SHIFT-JIS character will be length 1
    except UnicodeDecodeError:  #The string couldn't be decoded
        return False            #Not a SHIFT-JIS character
    return True                 #It is a SHIFT-JIS character

def replacestr(origstr,replacestr,startpos,replacelen):
#Returns a string with a replaced sub-string
#origstr - the original string, replacestr = the string to replace
#startpos - where the replacement string should go
#replacelen - how many characters of the original string to replace
    return origstr[:startpos] + replacestr + origstr[startpos+replacelen:]

def SJIS_convert(filename):
    with codecs.open(filename + '.tsv','rb','utf-8') as f:
        with codecs.open(filename + '.csv','wb','cp932') as g:
            for line in f:
                g.write(line)

def script_insert(filename):
    SJIS_convert(filename)
    inputdata = []                              #To hold input data
    with open(filename + '.csv') as f:          #Open input file
        for line in f:                          #Read by lines
            line = line.translate(None,'\r\n')  #Delete linebreak characters
            line = line.split('\t')             #Split along tabs
#Data structure for inputdata: string (chunk number) and list.
#The list is paired data and flag strings
            if line[0] == '':   #Append line to previous entry
                inputdata[-1][1].append([line[2],line[3]])
            else:               #Make new entry
                inputdata.append([line[0],[[line[2],line[3]]]])

    filedata = get_data(filename + '.orig')  #Get data
    ptr = 0                         #Initialize variables
    end_of_ptr_table = struct.unpack('<I',filedata[0:4])[0]
    ptr_table = []
    while ptr < end_of_ptr_table:   #Read in all pointers
        ptr_table.append(struct.unpack('<I',filedata[ptr:ptr+4])[0])
        ptr += 4
    flag = True                     #Init variable
    chunks = []                     #Break filedata into chunks
    for ptr in ptr_table:
        if flag == True:
            flag = False
        else:
            chunks.append(filedata[prev_ptr:ptr])
        prev_ptr = ptr
    chunks.append(filedata[ptr:])
    for i, chunk in enumerate(chunks):
        if chunk == '4002000040033C804002010040160000'.decode('hex'):
            delay_chunk = i
            break

    for i, data in inputdata:   #Loop over input data
        i = int(i,16)           #Convert chunk number to int
        if len(data) == 1:      #Name entry
            chunk = chunks[i]   #Access relevant chunk
            pos = 4             #Start of text data in name chunks
            #While the next character is a text character
            while is_jis_char(chunk[pos:pos+2]) or chunk[pos] in asciichar:
                if is_jis_char(chunk[pos:pos+2]):
                    pos += 2    #If a shift-JIS character, pos + 2
                elif chunk[pos] in asciichar:
                    pos += 1    #If an ascii character, pos + 1
            origlen = pos - 4   #Compute original length
            s = data[0][0]      #s is the name string
            while len(s) % 4 != 0:  #Word-align the name string
                s += '\x00'
            #Within the chunk, replace the string
            chunks[i] = replacestr(chunk, s, 4, origlen)
            #Update pointer table
            for j, ptr in enumerate(ptr_table):
                #If it's a pointer corresponding to a chunk after the current
                #one, update the pointer with the length difference.
                if j > i:
                    ptr_table[j] += len(s) - origlen
        else:                           #Script entry
            origlen = len(chunks[i])    #Get original length
            chunk = ''                  #Init variable
            for text, flag in data:
                if text[:2] == '0x':    #Opcode
                    #This list of opcodes needs to be word-aligned
                    if text[2:6] in ('4004','4006'):
                        while len(chunk) % 4 != 0:  #Word-align the opcode
                            chunk += '\x00'
                    chunk += text[2:].decode('hex') #Add opcode to chunk
                else:                   #Text
                    chunk += text                   #Add text to chunk
                    if 'linebreak' in flag:         #If linebreak flag
                        while len(chunk) % 4 != 0:  #Word align linebreak opcode
                            chunk += '\x00'
                        #Add linebreak opcode to chunk
                        chunk += '40050000'.decode('hex')
                    if 'delay' in flag:
                        #Add delay opcode to chunk
                        chunk += '4014'.decode('hex') + \
                                 struct.pack('<H',delay_chunk)
            chunks[i] = chunk                       #Write-back the new chunk
            for j, ptr in enumerate(ptr_table):     #Update pointers
                if j > i:
                    ptr_table[j] += len(chunk) - origlen

    with open(filename + '.sq','wb') as f:          #Open output file
        #Write the pointers
        f.write(''.join([struct.pack('<I',x) for x in ptr_table]))
        #Write each chunk
        f.write(''.join(chunks))

    filedata = get_data(filename + '.sq')   #Read in the file just outputted
    with open(pathname + '\\' + filename + '.sq','wb') as g:    #Open other file
        for x in filedata:                  #For each character in file
            g.write(chr(ord(x) ^ 255))      #Flip and then write the byte

if os.path.exists(pathname) == False:       #Make a directory if there isn't already one
    os.mkdir(pathname)
script_insert(sys.argv[1])                  #Insert the script