Guest User

DQ4 script dumper

a guest
Mar 21st, 2017
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.96 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf_8 -*-
  3.  
  4. from __future__ import print_function
  5. import collections, re, struct, sys, unicodedata
  6.  
  7.  
  8. if str is bytes:
  9.     # python2
  10.     def nextmethod(i):
  11.         return iter(i).next
  12. else:
  13.     # python3
  14.     def nextmethod(i):
  15.         return iter(i).__next__
  16.  
  17.  
  18. def rompos(bank, address):
  19.     """Return the absolute ROM position for a bank and CPU address.
  20.    Arguments:
  21.    bank: 16KB ROM bank number
  22.    address: address in CPU address space; must be between 0x8000 and 0xbfff
  23.    """
  24.     return bank * 0x4000 + address - 0x8000
  25.  
  26.  
  27. def pointers(data, address, count):
  28.     """Return a list of pointers extracted from the ROM.
  29.    Arguments:
  30.    data: the ROM data
  31.    address: start of pointer table in CPU address space; assumed to be in bank 0x16
  32.    count: number of pointers to extract
  33.    """
  34.     return struct.unpack_from("<%dH" % count, data, rompos(0x16, address))
  35.  
  36.  
  37. class Octets(collections.Iterator):
  38.     """Iterator that yields octets from the ROM starting at the given
  39.    bank and address, straddling banks as necessary.
  40.  
  41.    Constructor arguments:
  42.    data: the ROM data
  43.    bank: the logical ROM bank (0-5) 5 is translated to physical bank 0x1b
  44.    ptr:  start of stream in CPU address space
  45.  
  46.    Public attributes:
  47.    pos:  absolute ROM position of last byte yielded, or start position if
  48.          no bytes yielded yet. This attribute is read-only.
  49.    """
  50.     def __init__(self, data, bank, ptr):
  51.         self._data = data
  52.         self._offsets = self._offsetgen(bank, ptr)
  53.         self.pos = rompos(0x1b if bank == 5 else bank, ptr)
  54.  
  55.     def __next__(self):
  56.         self.pos = next(self._offsets)
  57.         return self._data[self.pos]
  58.     next = __next__ # for python2
  59.  
  60.     @staticmethod
  61.     def _offsetgen(bank, ptr):
  62.         banks = ((0,    0x8000),
  63.                  (1,    0x8000),
  64.                  (2,    0x8000),
  65.                  (3,    0x8000),
  66.                  (4,    0x8000),
  67.                  (0x1b, 0xa500))
  68.  
  69.         b, start = banks[bank]
  70.         if not (start <= ptr < 0xbfd8):
  71.             raise ValueError("WTF? Pointer out of range")
  72.  
  73.         for i in range(rompos(b, ptr), rompos(b, 0xbfd8)):
  74.             yield i
  75.  
  76.         for b, start in banks[bank+1:]:
  77.             for i in range(rompos(b, start), rompos(b, 0xbfd8)):
  78.                 yield i
  79.  
  80.  
  81. def sextets(octet):
  82.     """Generate sextets from a sequence of octets.
  83.  
  84.    In:
  85.    7      0 7      0 7      0
  86.    AAAAAAAA BBBBBBBB CCCCCCCC
  87.  
  88.    Out:
  89.    5    05     05     05    0
  90.    aaaaaabb bbbbcccc ccdddddd
  91.  
  92.    Arguments:
  93.    octet: any iterable that yields bytes
  94.    """
  95.     octet = iter(octet)
  96.     try:
  97.         while 1:
  98.             x = next(octet)
  99.             yield x >> 2
  100.             x = x << 8 | next(octet)
  101.             yield x >> 4 & 0x3f
  102.             x = x << 8 | next(octet)
  103.             yield x >> 6 & 0x3f
  104.             yield x & 0x3f
  105.     except StopIteration:
  106.         return
  107.  
  108.  
  109. def expander(lut, substring):
  110.     """Return a generator that expands sextets via the LUT and substring dictionary.
  111.    Arguments:
  112.    lut:       128-entry 6-bit to 8-bit lookup table
  113.    substring: 192-entry substring dictionary
  114.    Returns:
  115.    A generator that takes a sequence of sextets and yields expanded bytes.
  116.    """
  117.     def generator(sextet):
  118.         kanatype = 0
  119.         sextet = nextmethod(sextet)
  120.         for byte in iter(sextet, 0x39):
  121.             if byte < 0x3c:            # 0-0x3b: single character from table
  122.                 x = lut[kanatype + byte]
  123.                 if 0: # if x == 0xf0:
  124.                     for i in substring[0]:
  125.                         yield i
  126.                 else:
  127.                     yield x
  128.             elif byte == 0x3c:         # 0x3c: switch tables
  129.                 kanatype ^= 0x40
  130.             else:                      # 0x3d-0x3f: substring
  131.                 try:
  132.                     x = (byte - 0x3d) << 6 | sextet()
  133.                 except StopIteration:
  134.                     raise RuntimeError("WTF? Unexpected end of sextet stream")
  135.                 for i in substring[x]:
  136.                     yield i
  137.  
  138.         # iter(o, sentinel) cuts off the stop value, so yield it manually
  139.         yield lut[kanatype + 0x39]
  140.     return generator
  141.  
  142.  
  143. def decode(expanded):
  144.     """Return a string decoded from a sequence of expanded bytes.
  145.    """
  146.     # U+3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
  147.     # U+309A COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
  148.     charset = (u" 0123456789あいうえお"
  149.                u"かきくけこさしすせそたちつてとな"
  150.                u"にぬねのはひふへほまみむめもやゆ"
  151.                u"よらりるれろわをんっゃゅょ\u3099\u309a。"
  152.                u"アイエオカキクコサシスソタテトナ"
  153.                u"ニヌネノハヒフホマミムメモラルレ"
  154.                u"ロンャッドーじだどぶ゜゛.?!「"
  155.                u"*:…十骨MPXABCDEFGH")
  156.     maxc = len(charset)
  157.  
  158.     ret = []
  159.     append = ret.append
  160.     expanded = iter(expanded)
  161.     for byte in expanded:
  162.         if byte in (0x3d, 0x3e):
  163.             # move dakuten/handakuten after the next character
  164.             try:
  165.                 append(charset[next(expanded)])
  166.             except StopIteration:
  167.                 break
  168.         # dump nonprintable characters (control codes, etc.) as hex
  169.         append(charset[byte] if byte < maxc else u"<%02X>" % byte)
  170.         if byte in (0xfb, 0xfc):
  171.             append(u"\n")
  172.     return unicodedata.normalize("NFC", u"".join(ret))
  173.  
  174.  
  175. def prettify():
  176.     # use a regular expression to find places to fix:
  177.     #
  178.     # katakana followed by べぺり (but not へ, since it may be a particle)
  179.     # へべぺり followed by katakana or ー (this catches ヘ in the middle of a word)
  180.     # フ followed by ア or エ (make ファ or フェ with small katakana)
  181.     # リリパット (monster name, the rules above don't catch the first リ)
  182.     # イエティ, ガーディアン (monster names wanting small ィ)
  183.     sub = re.compile(u"[ア-ン][べぺり]|[へべぺり][ア-ー]|フ[アエ]|りりパ|エテイ|デイア").sub
  184.  
  185.     # use builtin str.translate() to convert hiragana to katakana
  186.     try:
  187.         trans = str.maketrans
  188.     except AttributeError: # python2 doesn't have unicode-capable maketrans()
  189.         def trans(src, dest):
  190.             return {ord(x): ord(y) for x, y in zip(src, dest)}
  191.     trans = trans(u"へべぺり", u"ヘベペリ")
  192.  
  193.     # use a dictionary to do the other fixes, and to override some conversions
  194.     specialfix = {u"へー":u"へー", # leave へ alone in でへへー, etc.
  195.                   u"りア":u"りア", # leave り alone in さそりアーマー
  196.                   u"エテイ":u"エティ",
  197.                   u"デイア":u"ディア",
  198.                   u"フア":u"ファ",
  199.                   u"フエ":u"フェ"}.get
  200.  
  201.     # wrap the fixes in a function that can be passed to re.sub()
  202.     def fix(x):
  203.         x = x.group(0)
  204.         return specialfix(x, x.translate(trans))
  205.  
  206.     # finally, wrap everything in a lambda closure
  207.     # so all the above setup is only done once
  208.     return lambda x: sub(fix, x)
  209. prettify = prettify()
  210.  
  211.  
  212. def main():
  213.     try:
  214.         fn = sys.argv[1]
  215.     except (IndexError, ValueError):
  216.         sys.exit("Usage: dq4text filename")
  217.  
  218.     # read the ROM, discarding the .nes header
  219.     with open(fn, "rb") as f:
  220.         f.seek(0x10)
  221.         data = bytearray(f.read())
  222.  
  223.     # extract the 6-bit to 8-bit lookup tables (64 hiragana + 64 katakana)
  224.     pos = rompos(0x16, 0x8765)
  225.     lut = data[pos:pos+128]
  226.  
  227.     # extract the 192-entry substring dictionary
  228.     # substrings are plain byte strings (not sextet-packed) and end with 0xfe
  229.     # like the main script, there is one pointer per 32 substrings
  230.     substring = []
  231.     for ptr in pointers(data, 0x87e5, 6):
  232.         n = nextmethod(Octets(data, 5, ptr))
  233.         substring.extend([bytearray(iter(n, 0xfe)) for i in range(32)])
  234.  
  235.     # set up the sextet decoder
  236.     expand = expander(lut, substring)
  237.  
  238.     # extract the bank bins
  239.     pos = rompos(0x16, 0x8960)
  240.     bankbins = data[pos:pos+5]
  241.  
  242.     bank = 0
  243.     dupeptr = set()
  244.     for i, ptr in enumerate(pointers(data, 0x88b0, 0x58)):
  245.         if i in bankbins:
  246.             bank += 1
  247.  
  248.         if (bank, ptr) in dupeptr:
  249.             continue
  250.         dupeptr.add((bank, ptr))
  251.  
  252.         octet = Octets(data, bank, ptr)
  253.         sextet = sextets(octet)
  254.  
  255.         # there are a few "holes" in the script
  256.         # (pointers with fewer than 32 valid strings after them)
  257.         count = {4: 11,
  258.                 15: 27,
  259.                 87: 3}.get(i, 32)
  260.  
  261.         start = octet.pos
  262.         for j in range(i * 32, i * 32 + count):
  263.             output = prettify(decode(expand(sextet)))
  264.             end = octet.pos
  265.             print("$%03X ($%05X-$%05X):" % (j, start, end))
  266.             print(output)
  267.             start = end
  268.  
  269.  
  270. if __name__ == "__main__":
  271.     main()
Add Comment
Please, Sign In to add comment