Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf_8 -*-
- from __future__ import print_function
- import collections, re, struct, sys, unicodedata
- if str is bytes:
- # python2
- def nextmethod(i):
- return iter(i).next
- else:
- # python3
- def nextmethod(i):
- return iter(i).__next__
- def rompos(bank, address):
- """Return the absolute ROM position for a bank and CPU address.
- Arguments:
- bank: 16KB ROM bank number
- address: address in CPU address space; must be between 0x8000 and 0xbfff
- """
- return bank * 0x4000 + address - 0x8000
- def pointers(data, address, count):
- """Return a list of pointers extracted from the ROM.
- Arguments:
- data: the ROM data
- address: start of pointer table in CPU address space; assumed to be in bank 0x16
- count: number of pointers to extract
- """
- return struct.unpack_from("<%dH" % count, data, rompos(0x16, address))
- class Octets(collections.Iterator):
- """Iterator that yields octets from the ROM starting at the given
- bank and address, straddling banks as necessary.
- Constructor arguments:
- data: the ROM data
- bank: the logical ROM bank (0-5) 5 is translated to physical bank 0x1b
- ptr: start of stream in CPU address space
- Public attributes:
- pos: absolute ROM position of last byte yielded, or start position if
- no bytes yielded yet. This attribute is read-only.
- """
- def __init__(self, data, bank, ptr):
- self._data = data
- self._offsets = self._offsetgen(bank, ptr)
- self.pos = rompos(0x1b if bank == 5 else bank, ptr)
- def __next__(self):
- self.pos = next(self._offsets)
- return self._data[self.pos]
- next = __next__ # for python2
- @staticmethod
- def _offsetgen(bank, ptr):
- banks = ((0, 0x8000),
- (1, 0x8000),
- (2, 0x8000),
- (3, 0x8000),
- (4, 0x8000),
- (0x1b, 0xa500))
- b, start = banks[bank]
- if not (start <= ptr < 0xbfd8):
- raise ValueError("WTF? Pointer out of range")
- for i in range(rompos(b, ptr), rompos(b, 0xbfd8)):
- yield i
- for b, start in banks[bank+1:]:
- for i in range(rompos(b, start), rompos(b, 0xbfd8)):
- yield i
- def sextets(octet):
- """Generate sextets from a sequence of octets.
- In:
- 7 0 7 0 7 0
- AAAAAAAA BBBBBBBB CCCCCCCC
- Out:
- 5 05 05 05 0
- aaaaaabb bbbbcccc ccdddddd
- Arguments:
- octet: any iterable that yields bytes
- """
- octet = iter(octet)
- try:
- while 1:
- x = next(octet)
- yield x >> 2
- x = x << 8 | next(octet)
- yield x >> 4 & 0x3f
- x = x << 8 | next(octet)
- yield x >> 6 & 0x3f
- yield x & 0x3f
- except StopIteration:
- return
- def expander(lut, substring):
- """Return a generator that expands sextets via the LUT and substring dictionary.
- Arguments:
- lut: 128-entry 6-bit to 8-bit lookup table
- substring: 192-entry substring dictionary
- Returns:
- A generator that takes a sequence of sextets and yields expanded bytes.
- """
- def generator(sextet):
- kanatype = 0
- sextet = nextmethod(sextet)
- for byte in iter(sextet, 0x39):
- if byte < 0x3c: # 0-0x3b: single character from table
- x = lut[kanatype + byte]
- if 0: # if x == 0xf0:
- for i in substring[0]:
- yield i
- else:
- yield x
- elif byte == 0x3c: # 0x3c: switch tables
- kanatype ^= 0x40
- else: # 0x3d-0x3f: substring
- try:
- x = (byte - 0x3d) << 6 | sextet()
- except StopIteration:
- raise RuntimeError("WTF? Unexpected end of sextet stream")
- for i in substring[x]:
- yield i
- # iter(o, sentinel) cuts off the stop value, so yield it manually
- yield lut[kanatype + 0x39]
- return generator
- def decode(expanded):
- """Return a string decoded from a sequence of expanded bytes.
- """
- # U+3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
- # U+309A COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
- charset = (u" 0123456789あいうえお"
- u"かきくけこさしすせそたちつてとな"
- u"にぬねのはひふへほまみむめもやゆ"
- u"よらりるれろわをんっゃゅょ\u3099\u309a。"
- u"アイエオカキクコサシスソタテトナ"
- u"ニヌネノハヒフホマミムメモラルレ"
- u"ロンャッドーじだどぶ゜゛.?!「"
- u"*:…十骨MPXABCDEFGH")
- maxc = len(charset)
- ret = []
- append = ret.append
- expanded = iter(expanded)
- for byte in expanded:
- if byte in (0x3d, 0x3e):
- # move dakuten/handakuten after the next character
- try:
- append(charset[next(expanded)])
- except StopIteration:
- break
- # dump nonprintable characters (control codes, etc.) as hex
- append(charset[byte] if byte < maxc else u"<%02X>" % byte)
- if byte in (0xfb, 0xfc):
- append(u"\n")
- return unicodedata.normalize("NFC", u"".join(ret))
- def prettify():
- # use a regular expression to find places to fix:
- #
- # katakana followed by べぺり (but not へ, since it may be a particle)
- # へべぺり followed by katakana or ー (this catches ヘ in the middle of a word)
- # フ followed by ア or エ (make ファ or フェ with small katakana)
- # リリパット (monster name, the rules above don't catch the first リ)
- # イエティ, ガーディアン (monster names wanting small ィ)
- sub = re.compile(u"[ア-ン][べぺり]|[へべぺり][ア-ー]|フ[アエ]|りりパ|エテイ|デイア").sub
- # use builtin str.translate() to convert hiragana to katakana
- try:
- trans = str.maketrans
- except AttributeError: # python2 doesn't have unicode-capable maketrans()
- def trans(src, dest):
- return {ord(x): ord(y) for x, y in zip(src, dest)}
- trans = trans(u"へべぺり", u"ヘベペリ")
- # use a dictionary to do the other fixes, and to override some conversions
- specialfix = {u"へー":u"へー", # leave へ alone in でへへー, etc.
- u"りア":u"りア", # leave り alone in さそりアーマー
- u"エテイ":u"エティ",
- u"デイア":u"ディア",
- u"フア":u"ファ",
- u"フエ":u"フェ"}.get
- # wrap the fixes in a function that can be passed to re.sub()
- def fix(x):
- x = x.group(0)
- return specialfix(x, x.translate(trans))
- # finally, wrap everything in a lambda closure
- # so all the above setup is only done once
- return lambda x: sub(fix, x)
- prettify = prettify()
- def main():
- try:
- fn = sys.argv[1]
- except (IndexError, ValueError):
- sys.exit("Usage: dq4text filename")
- # read the ROM, discarding the .nes header
- with open(fn, "rb") as f:
- f.seek(0x10)
- data = bytearray(f.read())
- # extract the 6-bit to 8-bit lookup tables (64 hiragana + 64 katakana)
- pos = rompos(0x16, 0x8765)
- lut = data[pos:pos+128]
- # extract the 192-entry substring dictionary
- # substrings are plain byte strings (not sextet-packed) and end with 0xfe
- # like the main script, there is one pointer per 32 substrings
- substring = []
- for ptr in pointers(data, 0x87e5, 6):
- n = nextmethod(Octets(data, 5, ptr))
- substring.extend([bytearray(iter(n, 0xfe)) for i in range(32)])
- # set up the sextet decoder
- expand = expander(lut, substring)
- # extract the bank bins
- pos = rompos(0x16, 0x8960)
- bankbins = data[pos:pos+5]
- bank = 0
- dupeptr = set()
- for i, ptr in enumerate(pointers(data, 0x88b0, 0x58)):
- if i in bankbins:
- bank += 1
- if (bank, ptr) in dupeptr:
- continue
- dupeptr.add((bank, ptr))
- octet = Octets(data, bank, ptr)
- sextet = sextets(octet)
- # there are a few "holes" in the script
- # (pointers with fewer than 32 valid strings after them)
- count = {4: 11,
- 15: 27,
- 87: 3}.get(i, 32)
- start = octet.pos
- for j in range(i * 32, i * 32 + count):
- output = prettify(decode(expand(sextet)))
- end = octet.pos
- print("$%03X ($%05X-$%05X):" % (j, start, end))
- print(output)
- start = end
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment