catpack version "works slightly better"

require 'csv'
require 'tempfile'
require 'fileutils'

def getStringLength(f)
    original_offset = f.tell()

    len = 0
    while f.readbyte != 0x00 do
        len += 1
    end
    len += 1 # null terminator

    f.seek(original_offset)
    return len
end

# monkeypatching shenanigans
class File
    def read_int
        read(4).unpack("I")[0]
    end

    def read_string
        read(getStringLength(self)).unpack("Z*")[0]
    end

    def write_int(i)
        write([i].pack("I"))
    end
end
# end shenanigans

class CSV_file_record
    attr_reader :tag, :semantic
    attr_reader :filenames

    def initialize(row)
        @tag = row[0]
        @semantic = row[1] # don't know what this does.

        @filenames = []
    end

    def add_filename(str)
        @filenames << str
    end
end

# so I had assumed that the CSV string was null-terminated
# originally, but some archives coincidentally have the
# end of the CSV string butting right up against the start
# of the actual file data, with no null terminator separating
# the two. as a stopgap, I'll just clip the string till the last newline
def unfuck_csv(str)
    last_newline = str.rindex("0A".hex.chr)
    str[0..last_newline]
end

class CSV_file_table
    attr_reader :files
    attr_reader :csv_table_string

    def initialize(file)
        file.seek(file.read_int) # seek to CSV table offset (the first int in the CAT)
        # NEW AND IMPROVED because what the fuck???
        @csv_table_string = unfuck_csv(file.read_string)

        raw_csv = CSV.parse(@csv_table_string)

        @files = []
        current_file = nil

        raw_csv.each do |row|
           if row[0] != nil # new file defined
               current_file = CSV_file_record.new(row)
               @files << current_file
           end
           current_file.add_filename(row[2])
        end

        print "Loaded CSV table containing #{@files.count} outer files.\n"
    end
end

def unpack_cat(filename)
    File.open(filename, 'rb') do |f|
        filetable = CSV_file_table.new(f)

        folder_name = filename[0..-(File.extname(filename).length+1)]
        FileUtils::mkdir_p(folder_name)

        filetable.files.each_with_index do |file, outer_index|
            f.seek(4 + outer_index*4)
            f.seek(f.read_int)

            file_base = f.tell

            if file.tag == "GXT"
                f.seek(file_base)
                if (f.read_int == 0x00545847) # GXT
                    # fuck it
                    puts "Encountered a GXT file. This type of file is not supported; aborting."
                    exit
                end
                f.seek(file_base)
                header_size = f.read_int
                file_count = f.read_int
                archive_size = f.read_int

                gxt_ptr_table_off = f.tell
                gxt_file_offsets = []
                file_count.times { gxt_file_offsets << f.read_int }
                f.seek(file_base + header_size)
                data_base = f.tell

                file.filenames.each_with_index do |inner_name, inner_index|
                    # pull filename from third CSV entry
                    out_filename = inner_name
                    out_filename += ".dds"
                    File.open(folder_name + '/' + out_filename, 'wb') do |outfile|
                        f.seek(file_base + header_size + gxt_file_offsets[inner_index])

                        image_base = f.tell
                        #f.seek(image_base + 0xC)
                        #dimensions = f.read(8).unpack("II")

                        print out_filename + ", "
                        print "0#{image_base.to_s(16)}"
                        print "\n"

                        #image_filesize = 0x4 + 0x7C + (dimensions[0] * dimensions[1])
                        image_filesize = -1
                        # if this is the last file then we calculate filesize differently
                        if inner_index == file.filenames.count - 1
                            image_filesize = (file_base + archive_size) - image_base
                        else
                            image_filesize = gxt_file_offsets[inner_index + 1] - gxt_file_offsets[inner_index]
                        end
                        f.seek(image_base)

                        outfile.write(f.read(image_filesize))
                    end
                end
            else
                # unknown tag, just dump the raw data
                f.seek(4 + (outer_index+1)*4)
                # subtract file offset from offset of next file to get
                # approximate file size (CAT archive doesn't seem to
                # know the sizes of the files inside it; just their offsets)
                next_ptr = f.read_int
                # i don't know if reading past EOF breaks things in ruby
                # and i'm too lazy to figure it out
                next_ptr = File.size(filename) if next_ptr == 0xFFFFFFFF
                filesize = next_ptr - file_base

                out_filename = file.semantic + "." + file.tag
                File.open(folder_name + '/' + out_filename, 'wb') do |outfile|
                    f.seek(file_base)
                    outfile.write(f.read(filesize))
                    puts "Dumped #{filesize.to_s(16)} bytes of unknown data from #{file_base} to #{out_filename}."
                end
            end
        end
    end
end

def padded_to(value, interval)
    return value if (value % interval == 0)
    return value + (interval - (value % interval) )
end

def repack_cat(filename)
    filetable = nil
    File.open(filename, 'rb') do |f|
        filetable = CSV_file_table.new(f)
    end

    folder_name = filename[0..-(File.extname(filename).length+1)]
    if !File.exist?(folder_name)
        puts "No valid directory found to repack."
        puts "(Directory #{folder_name} is either a file or is nonexistent.)"
        exit
    end

    fileblobs = []
    filetable.files.each_with_index do |file, outer_index|
        blob = nil
        if file.tag == "GXT"
            # assemble a GXT using files in the unpacked directory
            t = Tempfile.new('gxt')
            t.binmode

            # write temporary header
            3.times { t.write_int(0) }

            # write temporary offset table
            file.filenames.count.times { t.write_int(0) }

            file_data_base = t.tell
            offsets = []
            file.filenames.each do |name|
                offsets << t.tell - file_data_base
                target_path = folder_name + '/' + name + '.dds'
                if !File.exists?(target_path)
                    puts "Target file #{target_path} doesn't exist!"
                    # TODO: should probably just grab the file from the original archive but I can't be arsed
                end
                File.open(target_path, 'rb') do |target|
                    dat = target.read
                    t.write(dat)
                end
            end

            archive_size = t.tell

            # now write real header and offset table
            t.seek(0)
            t.write_int(file_data_base)
            t.write_int(file.filenames.count)
            t.write_int(archive_size)
            offsets.each do
                |offset| t.write_int(offset)
            end

            # finally, shove the GXT contents into our blob
            t.seek(0)
            blob = t.read
            t.seek(0)

            t.close
            t.unlink
        else
            # create a binary blob and stick that into the new file
            out_filename = file.semantic + "." + file.tag
            File.open(folder_name + '/' + out_filename, 'rb') do |target|
                blob = target.read
            end
        end

        fileblobs << blob
    end

    # we have an array of file blobs, now write a CAT
    # first, make a backup
    FileUtils.cp(filename, filename + '.backup') unless File.exists?(filename + '.backup')
    if !File.exists?(filename + '.backup')
        puts "Failed to create backup; aborting."
        exit
    end

    File.open(filename, 'wb') do |f|
        # write temp header
        f.write_int(0)
        # write temp offset table
        filetable.files.count.times { f.write_int(0) }
        f.write_int(0xFFFFFFFF) # write end pointer

        # the CAT archives seem to pad their entries to 0x40,
        # so I'll do the same
        csv_offset = padded_to(f.tell, 0x40)
        # write csv data thing
        f.seek(csv_offset)
        f.write(filetable.csv_table_string)

        offsets = []
        fileblobs.each do |blob|
            f.seek(padded_to(f.tell, 0x40))
            offsets << f.tell

            f.write(blob)
        end

        # now write the real header
        f.seek(0)
        f.write_int(csv_offset)
        # and the offset table
        offsets.each do |offset|
            f.write_int(offset)
        end
    end

    # done
end

filename = ""

if ARGV.count < 2
    puts "Not enough arguments. Specify an operation (unpack/repack) and path to target archive."
    exit
end

operation = ARGV[0]
filename = ARGV[1]

if filename == ""
    puts "No file specified."
end

if operation == "unpack" || operation == "u"
    unpack_cat(filename)
elsif operation == "repack" || operation == "r"
    repack_cat(filename)
else
    puts "Unknown operation #{operation} specified."
    exit
end