Advertisement
Guest User

Falcom Compress v2

a guest
Sep 3rd, 2016
519
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.24 KB | None | 0 0
  1. #v2: Full implementation of FALCOM2 for better compression performance and implement chunking for FALCOM3
  2. import struct
  3. import io
  4. import pdb
  5. from functools import partial
  6.  
  7. WINDOW_SIZE = 0x1FFF
  8. MIN_MATCH = 2
  9. MAX_MATCH = 0xFF + 0xE
  10. #The real max is 0x7FF0. We have to stop here to make sure to be under.
  11. CHUNK_SIZE_CMP_MAX = 0x7FC0
  12. #The real max is 0x3FFF0. We have to stop here to make sure to be under.
  13. CHUNK_SIZE_UNC_MAX = 0x3DFF0                        
  14. def find_match(filedata, pos):
  15.     if pos < WINDOW_SIZE:                           #Get window to find match in
  16.         window = filedata[:pos]
  17.     else:
  18.         window = filedata[pos - WINDOW_SIZE:pos]
  19.     if pos + MAX_MATCH > len(filedata):             #Avoids EOF errors
  20.         max_match = len(filedata) - pos
  21.     else:
  22.         max_match = MAX_MATCH
  23.     if max_match < MIN_MATCH:                       #Too close to EOF for a match
  24.         return 0, -1
  25.     if filedata[pos:pos + MIN_MATCH] not in window: #No match
  26.         return 0, -1
  27.     for size in range(MIN_MATCH, max_match + 1):    #Look for longest match
  28.         if filedata[pos:pos + size] not in window:
  29.             size -= 1
  30.             break
  31.     match_pos = window.rfind(filedata[pos:pos + size])
  32.     if len(window) - match_pos == size:             #Look for "extra match"
  33.                                                     #For example, find_match(b'abcdabcdabcdabcd', 4)
  34.                                                     #should return match_pos = 4, size = 12
  35.         extra_match = 0; extra_match1 = 0
  36.         multiplier = 0
  37.         while filedata[pos + size + extra_match] == window[match_pos + extra_match1]:
  38.             extra_match += 1; extra_match1 += 1
  39.             if match_pos + extra_match1 == len(window):
  40.                 extra_match1 = 0
  41.             if size + extra_match == max_match:
  42.                 break
  43.         size += extra_match
  44.     match_pos = len(window) - match_pos              #Convert absolute position to relative
  45.     return size, match_pos
  46. def find_repeat(filedata, pos):
  47.     max_match = 0xFFF + 0xE
  48.     window = filedata[pos:pos + max_match]
  49.     this_byte = filedata[pos]
  50.     repeat_len = 0
  51.     for byte in window:
  52.         if byte == this_byte:
  53.             repeat_len += 1
  54.         else:
  55.             break
  56.     if repeat_len < 0x3:                            #Min match = 0x3
  57.         return 0
  58.     else:
  59.         return repeat_len
  60.  
  61. def _updateflags(out):
  62. ###"Writes" a flag
  63. ###Writes data if the flag buffer is full, then flushes
  64.     global comp_buffer
  65.     global flag_pos     #Other functions need read access so it has to be global (or passed back and forth)
  66.     flags = 0
  67.     flag_write = 0x8000
  68.     flag_pos = 8
  69.     b = (yield)         #At this point ready to pass in first flag bit
  70.     while True:
  71.         if b:   #Write a "1" (otherwise write 0)
  72.             flags |= flag_write     #Writes a "1"
  73.         flag_pos -= 1
  74.         if flag_pos == 0:
  75.             out.write(struct.pack('<H', flags))
  76.             out.write(comp_buffer)
  77.             comp_buffer = bytearray(0)
  78.             flag_pos = 16
  79.             flags = 0
  80.         else:
  81.             flags >>= 1
  82.         b = (yield)
  83.  
  84. def compress_FALCOM3(filedata):
  85.     outdata = io.BytesIO()
  86.     indata = io.BytesIO(filedata)
  87.     #Format is compressed size + 11, decompressed size, chunks, 1st chunk size
  88.     outdata.write(struct.pack('<III',  0, len(filedata), 0))
  89.     first = True
  90.     pos = 0
  91.     i = 0
  92.     while pos < len(filedata):
  93.         i += 1
  94.         if first == True:
  95.             first = False
  96.         else:
  97.             outdata.write(b'\x01')
  98.         data, compressed_len = compress_FALCOM2(filedata[pos:], True)
  99.         pos += compressed_len
  100.         outdata.write(struct.pack('<H', len(data)))
  101.         outdata.write(data)
  102.     outdata.write(b'\x00')                                  #Stop flag
  103.     size = outdata.tell()
  104.     outdata.seek(0)                                         #Write message size
  105.     outdata.write(struct.pack('<I', size - 4))
  106.     outdata.seek(8)                                         #Write number of chunks
  107.     outdata.write(struct.pack('<I', i + 1))
  108.     outdata.seek(0)                                         #Output data
  109.     return outdata.read()
  110.  
  111. def encode_repeat(repeat_byte, repeat_size, updateflags):
  112.     if repeat_size < 0xE:
  113.         comp_buffer.append(repeat_byte)
  114.         updateflags.send(False)
  115.         encode_match(repeat_size - 1, 1, updateflags)
  116.     else:
  117.         repeat_size -= 0xE
  118.         for x in range(2):
  119.             updateflags.send(True)
  120.         for x in range(4):
  121.             updateflags.send(False)
  122.         comp_buffer.append(1)
  123.         updateflags.send(False)
  124.         if repeat_size < 0x10:
  125.             updateflags.send(False)
  126.             for i in reversed(range(4)):
  127.                 updateflags.send((repeat_size >> i) & 1)
  128.                 if i == 1:
  129.                     comp_buffer.append(repeat_byte)
  130.         else:
  131.             high_order = repeat_size >> 8
  132.             low_order = repeat_size & 0xFF
  133.             updateflags.send(True)
  134.             for i in reversed(range(4)):
  135.                 updateflags.send((high_order >> i) & 1)
  136.                 if i == 1:
  137.                     comp_buffer.append(low_order)
  138.                     comp_buffer.append(repeat_byte)
  139. def encode_match(match_size, match_pos, updateflags):    #Encode look-back first (then size)
  140.     if match_pos < 0x100:                   #Short look-back
  141.         updateflags.send(True)          
  142.         comp_buffer.append(match_pos)
  143.         updateflags.send(False)
  144.     else:                                   #Long look-back
  145.         high_order = match_pos >> 8
  146.         low_order = match_pos & 0xFF
  147.         for x in range(2):
  148.             updateflags.send(True)
  149.         for i in reversed(range(5)):
  150.             updateflags.send((high_order >> i) & 1)
  151.             if i == 1:
  152.                 comp_buffer.append(low_order)
  153.     for i in range(2, 5):                   #Encode match size
  154.         if i >= match_size:
  155.             break
  156.         updateflags.send(False)
  157.     if match_size >= 6:
  158.         updateflags.send(False)
  159.         if match_size >= 0xE:
  160.             match_size -= 0xE
  161.             comp_buffer.append(match_size)
  162.             updateflags.send(False)
  163.         else:
  164.             updateflags.send(True)
  165.             match_size -= 0x6
  166.             for i in reversed(range(3)):
  167.                 updateflags.send((match_size >> i) & 1)
  168.     else:
  169.         updateflags.send(True)
  170.    
  171. def compress_FALCOM2(filedata, FALCOM3 = False):
  172.     global comp_buffer                  #Other functions need write access
  173.     file_size = len(filedata)
  174.     pos = 0
  175.     comp_buffer = bytearray()
  176.     out = io.BytesIO()
  177.     updateflags = _updateflags(out)     #Initalize generator
  178.     next(updateflags)                   #More initialize generator
  179.     while pos < file_size:
  180.         if FALCOM3:
  181.             if out.tell() >= CHUNK_SIZE_CMP_MAX:            #Max compressed data for one chunk (FALCOM3)
  182.                 break
  183.             if pos >= CHUNK_SIZE_UNC_MAX:                   #Max uncompressed data for one chunk (FALCOM3)
  184.                 break
  185.         match_size, match_pos = find_match(filedata, pos)   #Find matches
  186.         repeat_size = find_repeat(filedata, pos)
  187.         if repeat_size > match_size:                        #Repeat is bigger
  188.             encode_repeat(filedata[pos], repeat_size, updateflags)
  189.             pos += repeat_size
  190.         elif match_size > 0:                                #Match is bigger
  191.             encode_match(match_size, match_pos, updateflags)
  192.             pos += match_size
  193.         else:                                               #No pattern
  194.             comp_buffer.append(filedata[pos])
  195.             pos += 1
  196.             updateflags.send(False)
  197.  
  198.     for x in range(2):
  199.         updateflags.send(True)
  200.     for x in range(5):
  201.         if x == 4:
  202.             comp_buffer.append(0)
  203.         updateflags.send(False)
  204.     if flag_pos != 0x10:
  205.         for x in range(flag_pos):
  206.             updateflags.send(False)
  207.     out.seek(0)
  208.     if FALCOM3:
  209.         return out.read(), pos
  210.     else:
  211.         return out.read()
  212. if __name__ == '__main__':
  213.     pass
  214. ##    with open('mp_0110.cmp', 'wb') as g:
  215. ##        with open('mp_0110.orig', 'rb') as f:
  216. ##            g.write(compress_FALCOM3(f.read()))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement