Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public void comptext(byte[] src, byte[] dest) {
- DateTime c = DateTime.Now;//.Ticks;
- //Welcome to my Huffman Hamburger! (Scan text, do complex char tables, compress text.)
- //Scan text to generate frequency table.
- ushort char1 = 0, char2 = 0;
- ushort[] freq = new ushort[0x10000]; //We need to know how often each character combination occurs to determine best bit amounts.
- ushort[] clen = new ushort[0x100]; //How many chars each char has associated with it.
- ushort[] clst = new ushort[0x10000]; //Char list in the order they appear in the text.
- int srcEntry = 0;
- while ((Bits.getInt32(src, srcEntry) != 0) || (srcEntry == 0)) { //Set up frequency table and char list (in order displayed in text.)
- int srcPos = 0xC300 + Bits.getInt32(src, srcEntry);
- do {
- char2 = src[srcPos++];
- if (freq[char1 * 0x100 + char2]++ == 0) {
- clst[char1 * 0x100 + clen[char1]++] = char2; //clen[char1]++;// += 1;
- }
- //freq[char1 * 0x100 + char2] += 1;
- char1 = char2;
- } while (char1 != 0); //Change to while < textArray?-- No, go through offset list instead.
- srcEntry += 4;
- }
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/clst.dmp", Array.ConvertAll<short, byte>(clst, delegate(short item) { return (byte)item; }));
- byte[] bitLen = new byte[0x10000]; //int[] bitLen = new int[0x10000];
- int[] bitCode = new int[0x10000];
- int addr2 = 0, chrptlen = 0;
- byte[] chrTbl = new byte[0x8000];
- byte[] chrPtrs = new byte[0x200];
- for (int c1 = 0; c1 < 0x100; c1++) {
- if (clen[c1] == 0) { chrPtrs[(c1 << 1) + 1] = 0x80; continue; }
- chrptlen = (c1 + 1) << 1;
- //if (c1 > 5) { continue; } //For testing.
- //Sort chars by symbol frequency (simple)
- //See https://en.wikipedia.org/wiki/Sorting_algorithm - Use a Stable one so same-freq chars stay in order.
- //I pick simple Insertion Sort for now, since we are dealing with small sets. (https://en.wikipedia.org/wiki/Insertion_sort)
- for (int i = 1; i < clen[c1]; i++) {
- ushort x = clst[(c1 << 8) + i];
- int j = i;
- while ((j > 0) && (freq[(c1 << 8) + clst[(c1 << 8) + j - 1]] > freq[(c1 << 8) + x])) {
- clst[(c1 << 8) + j] = clst[(c1 << 8) + j - 1];
- j = j - 1;
- }
- clst[(c1 << 8) + j] = x;
- }
- //Sort chars by node frequency (More advanced)
- int[] symbSort = new int[0x100]; //Basically points to chars in order to be displayed in data.
- int[] symbBits = new int[0x100];
- int[] nodeHead = new int[0x100];
- int[] nodeTail = new int[0x100];
- int[] nodeFreq = new int[0x100]; nodeFreq[0] = 0x7FFFFFFF; nodeFreq[1] = 0x7FFFFFFF; //Ensure unused/node2 when there is none.
- int nodeA = 0, nodeI = 0, symbI = 0;
- if (clen[c1] > 1) {
- while ((symbI < clen[c1]) || (nodeA < nodeI - 1)) {
- int symfreq1 = freq[(c1 << 8) + clst[(c1 << 8) + symbI]];
- int symfreq2 = freq[(c1 << 8) + clst[(c1 << 8) + symbI + 1]];
- if ((symbI + 1 < clen[c1]) && (symfreq2 <= nodeFreq[nodeA])) { //Symbol - Symbol
- symbSort[symbI] = symbI + 1;
- nodeHead[nodeI] = symbI; nodeTail[nodeI] = symbI + 1;
- nodeFreq[nodeI] = symfreq1 + symfreq2;
- symbI += 2;
- } else if ((symbI < clen[c1]) && (symfreq1 <= nodeFreq[nodeA])) { // Symbol - Node
- symbSort[symbI] = nodeHead[nodeA];
- nodeHead[nodeI] = symbI; nodeTail[nodeI] = nodeTail[nodeA];
- nodeFreq[nodeI] = symfreq1 + nodeFreq[nodeA];
- symbI++; nodeA++;
- } else if ((nodeA < nodeI - 1) && ((nodeFreq[nodeA + 1] < symfreq1) || ((symbI >= clen[c1])))) { // Node - Node
- symbSort[nodeTail[nodeA]] = nodeHead[nodeA + 1];
- nodeHead[nodeI] = nodeHead[nodeA]; nodeTail[nodeI] = nodeTail[nodeA + 1];
- nodeFreq[nodeI] = nodeFreq[nodeA] + nodeFreq[nodeA + 1];
- nodeA += 2;
- } else if (nodeFreq[nodeA] < symfreq1) { // Node - Symbol
- symbSort[nodeTail[nodeA]] = symbI;
- nodeHead[nodeI] = nodeHead[nodeA]; nodeTail[nodeI] = symbI;
- nodeFreq[nodeI] = nodeFreq[nodeA] + symfreq1;
- symbI++; nodeA++;
- }
- symbBits[clst[(c1 << 8) + nodeHead[nodeI++]]] += 1;
- }
- }
- addr2 += (((clen[c1] * 12) + 4) & -8);
- chrPtrs[(c1 << 1)] = (byte)(addr2 >> 3);
- chrPtrs[(c1 << 1) + 1] = (byte)(addr2 >> 11);
- int addr1 = addr2 - 12;
- byte bitsL = 0;
- //int val = 0;
- //int bitnum = (clen[c1] & 1) * 4;
- int bitC = 0;
- for (int n = clen[c1]; n > 0; n--) {
- //List chars
- chrTbl[(addr1 >> 3)] |= (byte)(clst[(c1 << 8) + nodeHead[nodeA]] << (addr1 & 7));
- chrTbl[(addr1 >> 3) + 1] |= (byte)(clst[(c1 << 8) + nodeHead[nodeA]] >> (8 - (addr1 & 7)));
- addr1 -= 12;
- //val |= clst[(c1 << 8) + nodeHead[nodeA]] << bitnum; bitnum += 12;
- //while (bitnum >= 8) {
- // chrTbl[addr1++] = (byte)val; bitnum -= 8;
- //}
- //List the char's tree/flags
- addr2 += symbBits[clst[(c1 << 8) + nodeHead[nodeA]]];
- chrTbl[addr2 >> 3] |= (byte)(1 << (addr2++ & 7));
- //Calculate bit lengths for bit code.
- bitsL += (byte)symbBits[clst[(c1 << 8) + nodeHead[nodeA]]];
- //bitLen[clst[(c1 << 8) + nodeHead[nodeA]]] = bitsL;
- bitLen[(c1 << 8) + clst[(c1 << 8) + nodeHead[nodeA]]] = bitsL;
- //if (symbBits[clst[(c1 << 8) + nodeHead[nodeA]]] == 0) { bitsL -= 1; }
- //if (c1 == 0) { Console.WriteLine(bitC.ToString("X8") + " " + bitsL.ToString("X8") + " " + (char)clst[(c1 << 8) + nodeHead[nodeA]]); }
- //Generate bitCode table.
- bitCode[(c1 << 8) + clst[(c1 << 8) + nodeHead[nodeA]]] = bitC;
- while (((bitC >> (bitsL - 1)) & 1) == 1) { bitsL -= 1; bitC ^= 1 << bitsL; }
- bitC |= 1 << (bitsL - 1);
- nodeHead[nodeA] = symbSort[nodeHead[nodeA]];
- }
- addr2 = (addr2 + 8) & -8;
- //Console.WriteLine("\nLetter by node order");
- //for (int zz = 0; zz < clen[c1]; zz++) {
- // Console.Write(clst[(c1 << 8) + nodeHead[nodeA]].ToString("X4") + " ");
- // //Console.Write(symbBits[clst[(c1 << 8) + nodeHead[nodeA]]].ToString("X4") + " ");
- // nodeHead[nodeA] = symbSort[nodeHead[nodeA]];
- //}
- //Console.WriteLine("\nsymbSort");
- //for (int zz = 0; zz < clen[c1]; zz++) {
- // Console.Write(symbSort[zz].ToString("X4") + " ");
- //}
- }
- //Finally compress the text.
- int val = 0, bitnum = 0, ctAddr = 0, cstrstart = 0;
- byte[] cText = new byte[src.Length];
- byte[] txtref1 = new byte[0x200]; int tr1Addr = 0;
- byte[] txtref2 = new byte[0x8000]; int tr2Addr = 0;
- srcEntry = 0; char1 = 0;
- while ((Bits.getInt32(src, srcEntry) != 0) || (srcEntry == 0)) {
- if ((srcEntry & 0x3FC) == 0) {
- Bits.setInt32(txtref1, tr1Addr, ctAddr); tr1Addr += 4;
- Bits.setInt32(txtref1, tr1Addr, tr2Addr); tr1Addr += 4;
- }
- cstrstart = ctAddr;
- int srcPos = 0xC300 + Bits.getInt32(src, srcEntry); val = 0;
- do {
- char2 = src[srcPos++];
- val |= bitCode[(char1 << 8) + char2] << bitnum;
- bitnum += bitLen[(char1 << 8) + char2];
- while (bitnum >= 8) {
- cText[ctAddr++] = (byte)val; val >>= 8; bitnum -= 8;
- }
- //if (freq[char1 * 0x100 + char2]++ == 0) {
- // clst[char1 * 0x100 + clen[char1]++] = char2; //clen[char1]++;// += 1;
- //}
- //freq[char1 * 0x100 + char2] += 1;
- //if (srcEntry == 0) { Console.WriteLine(bitCode[(char1 << 8) + char2].ToString("X8") + " " + bitLen[(char1 << 8) + char2].ToString("X8")); }
- char1 = char2;
- } while (char1 != 0); //Change to while < textArray?-- No, go through offset list instead.
- srcEntry += 4; if (bitnum != 0) { cText[ctAddr++] = (byte)val; bitnum = 0; }
- while ((ctAddr - cstrstart) > 0xFE) { txtref2[tr2Addr++] = 0xFF; cstrstart += 0xFF; }
- txtref2[tr2Addr++] = (byte)(ctAddr - cstrstart); //cstrstart = ctAddr;
- }
- //Now insert everything into the ROM.
- int insAddr = 0xFA0000;
- int loc1 = insAddr;
- Array.Copy(chrTbl, 0, dest, insAddr, addr2 >> 3); insAddr += addr2 >> 3;
- insAddr = (insAddr + 1) & -2;
- int loc2 = insAddr;
- Array.Copy(chrPtrs, 0, dest, insAddr, chrptlen); insAddr += chrptlen;
- Bits.setInt32(dest, 0x38578, 0x08000000 + insAddr);
- Bits.setInt32(dest, insAddr, 0x08000000 + loc1); insAddr += 4;
- Bits.setInt32(dest, insAddr, 0x08000000 + loc2); insAddr += 4;
- loc1 = insAddr;
- Array.Copy(cText, 0, dest, insAddr, ctAddr); insAddr += ctAddr;
- loc2 = insAddr;
- Array.Copy(txtref2, 0, dest, insAddr, tr2Addr); insAddr += tr2Addr;
- insAddr = (insAddr + 3) & -4;
- Bits.setInt32(dest, 0x385DC, 0x08000000 + insAddr);
- for (int a = 0; a < tr1Addr; a += 8) {
- Bits.setInt32(dest, insAddr + a, 0x08000000 + Bits.getInt32(txtref1, a) + loc1);
- Bits.setInt32(dest, insAddr + a + 4, 0x08000000 + Bits.getInt32(txtref1, a + 4) + loc2);
- }
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/txtromtest.gba", dest);
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/txtref1.dmp", txtref1);
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/txtref2.dmp", txtref2);
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/cText.dmp", cText);
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/chrPtrs.dmp", chrPtrs);
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/chrTbl.dmp", chrTbl);
- //System.IO.File.WriteAllBytes("C:/Users/Tea/Desktop/bitLen.dmp", bitLen);
- //DateTime c = DateTime.Now;//.Ticks;
- Console.WriteLine((DateTime.Now - c).ToString());
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement