View difference between Paste ID: mQNFFvck and BhczNMC2
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/python
2
# -*- coding: latin-1 -*-
3
4
import struct
5
import codecs
6
7
def readBits(bits, currentByte, bitCount, file):
8
  num = 0
9
  for i in range(0, bits):
10
    if bitCount > 7:
11
      currentByte = struct.unpack('B', file.read(1))[0]
12
      bitCount = 0
13
    
14
    newBit = (currentByte >> (7 - bitCount)) & 1
15
    bitCount += 1
16
    
17
    num = (num << 1) + newBit
18
  return num, currentByte, bitCount
19-
def numberToLetter(num):
19+
20
def numberToLetter(num, spanish=False):
21
  if spanish:
22-
    0x01: '?',
22+
    return {
23
      0x00: ' ',
24
      0x01: 'c', # c1 = HERO c2 = HEROINE
25
      0x02: 'A',
26
      0x03: 'B',
27
      0x04: 'C',
28
      0x05: 'D',
29
      0x06: 'E',
30
      0x07: 'F',
31
      0x08: 'G',
32
      0x09: 'H',
33
      0x0A: 'I',
34
      0x0B: 'J',
35
      0x0C: 'K',
36
      0x0D: 'L',
37
      0x0E: 'M',
38
      0x0F: 'N',
39
      0x10: 'O',
40
      0x11: 'P',
41
      0x12: 'Q',
42
      0x13: 'R',
43
      0x14: 'S',
44
      0x15: 'T',
45
      0x16: 'U',
46
      0x17: 'V',
47
      0x18: 'W',
48
      0x19: 'X',
49
      0x1A: 'Y',
50
      0x1B: 'Z',
51
      0x1C: '?',
52
      0x1D: '0',
53
      0x1E: '1',
54
      0x1F: '2',
55
      0x20: '3',
56
      0x21: '4',
57
      0x22: '5',
58
      0x23: '6',
59
      0x24: '7',
60
      0x25: '8',
61
      0x26: '9',
62
      0x27: '!',
63
      0x28: ',',
64
      0x29: '.',
65-
    0x30: unichr(0x00C8), #È
65+
      0x2A: "'",
66-
    0x31: unichr(0x00C2), #Â
66+
      0x2B: 'b', #clear box
67-
    0x32: unichr(0x00DF), #ß
67+
      0x2C: 'c', #carriage return
68
      0x2D: 'd', #string end
69
      0x2E: 'e', #todo
70
      0x2F: unichr(0x00C4), #Ä
71-
    0x36: unichr(0x00C0), #À
71+
      0x30: unichr(0x00DA), #Ú
72
      0x31: unichr(0x00D3), #Ó
73
      0x32: unichr(0x00CD), #Í
74
      0x33: '-',
75
      0x34: unichr(0x00C9), #É 
76
      0x35: unichr(0x00D1), #Ñ
77
      0x36: unichr(0x00C1), #Á
78
      0x37: unichr(0x00D6), #Ö
79
      0x38: unichr(0x00DC), #Ü
80
      0x39: unichr(0x00CA), #Ê
81
      0x3A: unichr(0x00BF), #¿
82
      0x3B: unichr(0x00A1), #¡
83
      }.get(num, '?')     
84
  
85
  return {
86
    0x00: ' ',
87
    0x01: 'c', # c1 = HERO c2 = HEROINE
88
    0x02: 'A',
89
    0x03: 'B',
90
    0x04: 'C',
91
    0x05: 'D',
92
    0x06: 'E',
93
    0x07: 'F',
94
    0x08: 'G',
95
    0x09: 'H',
96
    0x0A: 'I',
97-
      word += numberToLetter(letter)
97+
98
    0x0C: 'K',
99
    0x0D: 'L',
100
    0x0E: 'M',
101-
# open rom file
101+
102
    0x10: 'O',
103-
  #words = open("table.txt", "wb")
103+
104
    0x12: 'Q',
105
    0x13: 'R',
106-
      dumpTable(i, rom, words)
106+
107
    0x15: 'T',
108
    0x16: 'U',
109
    0x17: 'V',
110
    0x18: 'W',
111
    0x19: 'X',
112
    0x1A: 'Y',
113
    0x1B: 'Z',
114
    0x1C: '?',
115
    0x1D: '0',
116
    0x1E: '1',
117
    0x1F: '2',
118
    0x20: '3',
119
    0x21: '4',
120
    0x22: '5',
121
    0x23: '6',
122
    0x24: '7',
123
    0x25: '8',
124
    0x26: '9',
125
    0x27: '!',
126
    0x28: ',',
127
    0x29: '.',
128
    0x2A: "'",
129
    0x2B: 'b', #clear box
130
    0x2C: 'c', #carriage return
131
    0x2D: 'd', #string end
132
    0x2E: 'e', #todo
133
    0x2F: unichr(0x00C4), #Ä
134
    0x30: unichr(0x00C8), #È for Spanish 0x00DA Ú
135
    0x31: unichr(0x00C2), #Â for Spanish 0x00D3 Ó
136
    0x32: unichr(0x00DF), #ß for Spanish 0x00CD Í
137
    0x33: '-',
138
    0x34: unichr(0x00C9), #É 
139
    0x35: unichr(0x00D1), #Ñ
140
    0x36: unichr(0x00C0), #À for Spanish 0x00C1 Á
141
    0x37: unichr(0x00D6), #Ö
142
    0x38: unichr(0x00DC), #Ü
143
    0x39: unichr(0x00CA), #Ê
144
    0x3A: unichr(0x00BF), #¿
145
    0x3B: unichr(0x00A1), #¡
146
    }.get(num, '?') 
147
148
def dumpTable(num, infile, outfile):
149
  if num > 3:
150
    bank = 0xC
151
  else:
152
    bank = 0x3
153
       
154
  infile.seek(0xC026 + 2*num)
155
  pointer = struct.unpack('H', infile.read(2))[0]
156
  infile.seek(pointer + 0x4000 * (bank - 1))
157
  
158
  currentByte = 0
159
  bitCount = 8
160
    
161
  for i in range(0, 256):
162
    wordLength, currentByte, bitCount = readBits(5, currentByte, bitCount, infile)
163
    word = ""
164
    for j in range(0, wordLength):
165
      letter, currentByte, bitCount = readBits(6, currentByte, bitCount, infile)
166
      word += numberToLetter(letter, (num == 4))
167
 
168
    outfile.write(word.encode('UTF-8') + "\n") 
169
170
#main
171
with open("rom.gb", "rb") as rom:
172
  for i in range(0, 5):
173
    with open("table"+str(i)+".txt", "wb") as words:
174
      dumpTable(i, rom, words)
175
176
words = []
177
for i in range(0,5):
178
  with codecs.open("table"+str(i)+".txt", "r", 'utf-8') as f:
179
    words.append(f.readlines())
180
181
with open("rom.gb", "rb") as rom:
182
  for lang in range(0, 5):
183
    with codecs.open("strings"+str(lang)+".txt", "wb", 'utf-8') as out:
184
      for i in range(0, 139):
185
	rom.seek(0x2C000 + i * 5 * 2 + lang * 2)
186
	pointer = struct.unpack('H', rom.read(2))[0]
187
	if lang < 2:
188
	  bank = 0x0B
189
	elif lang < 3:
190
	  bank = 0x0D
191
	else:
192
	  bank = 0x0C
193
	rom.seek(pointer + 0x4000 * (bank - 1))
194
  
195
	currentByte = 0
196
	bitCount = 8
197
	currentLetter = 0x00
198
	bit = 0
199
	sentence = ""
200
  
201
	while True:
202
	  bit, currentByte, bitCount = readBits(1, currentByte, bitCount, rom)
203
	  if bit == 0:
204
	    currentLetter, currentByte, bitCount = readBits(6, currentByte, bitCount, rom)
205
	    if currentLetter == 0x2D:
206
	      break
207
      
208
	    sentence += numberToLetter(currentLetter, (lang == 4))
209
	  else:
210
	    currentLetter, currentByte, bitCount = readBits(8, currentByte, bitCount, rom)
211
	    sentence += words[lang][currentLetter][:-1]
212
213
	out.write(sentence + "\n")