Zoinkity

Vanguard Bandits LZSS

Nov 13th, 2025 (edited)
234
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 13.25 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. """
  4. A fairly straight port of Haruhiko Okumura's LZSS codec from C.
  5. Nothing really clever, and not very pythonic.  (I'm not that good at this honestly.)
  6. """
  7.  
  8. # Like its C equiv., has problems when idx isn't what it would like.  Tied to node init, and instead of attempting to fix this came up with an equally problematic ringless-ring encoder.
  9. def encode(data, ring, limit, threshold, idx, fill, byteorder, condition=True):
  10.     """
  11.    Compresses [data], returning a bytes object.
  12.  
  13.    [ring] may be any size between 0x4000 and 0x100.
  14.    [limit] set the longest length for a copy; proper values depend on ring and threshold.
  15.    [threshold] is the shortest acceptable copy, minus one.
  16.    [idx] is typically ring - limit but may differ on application.
  17.    [fill] is used to initialize the ring.
  18.        *) For text applications use a space (b' ', or ASCII code 0x20)
  19.        *) Binary applications use a zero (b'\\0' or 0)
  20.    [byteorder] sets the byteorder copy commands are written in.
  21.        Usually this is 'little', but 'big' is also acceptable.
  22.    [condition] sets the flag state used for literals and dictionary hits.
  23.    """
  24.     output = bytearray()
  25.     if not data: return output
  26.  
  27.     from itertools import repeat
  28.  
  29.     # Initialize the ring buffer with a common fill value.
  30.     if isinstance(fill, (bytes, bytearray)):
  31.         fill = fill[0]
  32.     rng = bytearray(repeat(fill, ring + limit))
  33.     matchlen, matchpos, nil = 0, 0, ring
  34.     # Initialize the trees.
  35.     lson= list(bytes(ring + 1))
  36.     rson= list(bytes(ring + 1))
  37.     rson.extend(repeat(nil, 256))
  38.     dad = list(repeat(nil, ring + 1))
  39.  
  40.     # Create the masks for encoding values.
  41.     # This assumes minimum ring of 0x100, maximum length of 0x100 + threashold + 1.
  42.     r_mask = ring - 1
  43.     b_shft = r_mask.bit_length() - 8
  44.     b_mask = (0xFF00 >> b_shft) & 0xFF
  45.     condition = 0x80 if condition else 0
  46.     noncondition = condition ^ 0x80
  47.  
  48.     def InsertNode(v):
  49.         """Inserts string into trees and returns
  50.        longest match position and length as a tuple.
  51.        If match length equals the limit, it replaces the old node.
  52.        By embedding this function it can use the trees and ring globally."""
  53.         cmp, ml, mp = 1, 0, matchpos
  54.         key = rng[v:]
  55.         pos = ring + 1 + key[0]
  56.         rson[v] = nil
  57.         lson[v] = nil
  58.         while True:
  59.             if cmp>=0:
  60.                 if rson[pos] != nil:
  61.                     pos = rson[pos]
  62.                 else:
  63.                     rson[pos] = v
  64.                     dad[v] = pos
  65.                     return (ml, mp)
  66.             else:
  67.                 if lson[pos] != nil:
  68.                     pos = lson[pos]
  69.                 else:
  70.                     lson[pos] = v
  71.                     dad[v] = pos
  72.                     return (ml, mp)
  73.             for i in range(1, limit+1):
  74.                 cmp = key[i] - rng[pos + i]
  75.                 if cmp:
  76.                     break
  77.             if i > ml:
  78.                 mp, ml = pos, i
  79.                 if ml >= limit: break
  80.         dad[v] = dad[pos]
  81.         lson[v]=lson[pos]
  82.         rson[v]=rson[pos]
  83.         dad[lson[pos]] = v
  84.         dad[rson[pos]] = v
  85.         if rson[dad[pos]] == pos:
  86.             rson[dad[pos]] = v
  87.         else:
  88.             lson[dad[pos]] = v
  89.         dad[pos] = nil
  90.         return (ml, mp)
  91.  
  92.     def DeleteNode(pos):
  93.         if dad[pos] == nil: return
  94.         # If it's in the tree, delete it.
  95.         q = lson[pos]
  96.         if lson[pos] == nil:
  97.             q = rson[pos]
  98.         elif rson[pos] != nil:
  99.             if rson[q] != nil:
  100.                 while True:
  101.                     q = rson[q]
  102.                     if rson[q] == nil: break
  103.                 rson[dad[q]] = lson[q]
  104.                 dad[lson[q]] = dad[q]
  105.                 lson[q] = lson[pos]
  106.                 dad[lson[pos]] = q
  107.             rson[q] = rson[pos]
  108.             dad[rson[pos]] = q
  109.         dad[q] = dad[pos]
  110.         if rson[dad[pos]] == pos:
  111.             rson[dad[pos]] = q
  112.         else:
  113.             lson[dad[pos]] = q
  114.         dad[pos] = nil
  115.  
  116.     # Unset flags on copies; send when less than 256.
  117.     mask = 0xFF00
  118.     codebuf = bytearray()
  119.     s = 0
  120.     # Read limit bytes into the ring at idx.
  121.     p = min(limit, len(data)-1)
  122.     rng[idx:idx+p] = data[0:p]
  123.     cur = p
  124.     for i in range(1, limit+1):
  125.         matchlen, matchpos = InsertNode(idx - i)
  126.     matchlen, matchpos = InsertNode(idx)
  127.     # Now you're initialized, so do the rest of the file.
  128.     while True:
  129.         mask>>=1
  130.         matchlen = min(matchlen, p)
  131.         if matchlen <= threshold:
  132.             matchlen = 1
  133.             codebuf.append(rng[idx])
  134.             mask ^= noncondition
  135.         else:
  136.             mask ^= condition
  137.             a = (matchpos>>b_shft) & b_mask
  138.             a |= (matchlen - threshold - 1)
  139.             b = matchpos&0xFF
  140.             if byteorder == 'little':
  141.                 codebuf.extend((b,a))
  142.             else:
  143.                 codebuf.extend((a,b))
  144.         # Flush when the mask is full.
  145.         if mask < 256:
  146.             output.append(mask)
  147.             output.extend(codebuf)
  148.             codebuf = bytearray()
  149.             mask = 0xFF00
  150.         prevmatchlen = matchlen
  151.         j = min(prevmatchlen, len(data)-cur)
  152.         for i in range(j):
  153.             DeleteNode(s)
  154.             rng[s] = data[cur]
  155.             if s < (limit - 1):
  156.                 rng[s + ring] = data[cur]
  157.             cur+=1
  158.             # Correct the ring position via modulo ring
  159.             s+=1
  160.             s&= r_mask
  161.             idx+=1
  162.             idx&= r_mask
  163.             matchlen, matchpos = InsertNode(idx)
  164.         # Flush the rest of the buffer if necessary.
  165.         for i in range(j, prevmatchlen):
  166.             DeleteNode(s)
  167.             s+=1
  168.             s&= r_mask
  169.             idx+=1
  170.             idx&= r_mask
  171.             p-=1
  172.             if p:
  173.                 matchlen, matchpos = InsertNode(idx)
  174.         # Loop until source empty.
  175.         if not p:
  176.             break
  177.     # Flush remaining output; mask the lead bit off the mask.
  178.     if codebuf:
  179.         ## mask &= ~(1<<mask.bit_length()-1)   # bottom to top bitorder.
  180.         l = mask.bit_length() - 8
  181.         mask &= 0xFF
  182.         output.append(mask>>l)
  183.         output.extend(codebuf)
  184.     return output
  185.  
  186. def decode(data, out_sz, ring, threshold, idx, fill, byteorder, bitorder='bottom', condition=True, count=-1, relative=False):
  187.     """
  188.    Decompresses [data] originally compressed with encode(),
  189.        returning a bytes object.
  190.    if [out_sz] is None, will decompress to the end of file.
  191.    [ring], [threshold], [idx], [fill], and [byteorder]
  192.        should match the original compression settings.
  193.  
  194.    [ring] may be any size between 0x4000 and 0x100.
  195.    [threshold] is the shortest acceptable copy, minus one.
  196.    [idx] is typically ring - limit but may differ on application.
  197.    [fill] is used to initialize the ring.
  198.        *) For text applications use a space (b' ', or ASCII code 0x20)
  199.        *) Binary applications use a zero (b'\\0' or 0)
  200.    [byteorder] sets the byteorder copy commands are written in.
  201.        Usually this is 'little', but 'big' is also acceptable.
  202.    [bitorder] indicates if command bits are read from the 'bottom' or 'top'.
  203.    [condition] sets if a literal is written when a flag is True or False.
  204.    [count], when set, terminates decompression after the given number of commands.
  205.    [relative], if True, effectively is ringless.
  206.    """
  207.     output, buffer = bytearray(), bytearray(ring)
  208.     # set up ring buffer
  209.     if fill:
  210.         from itertools import repeat
  211.         if isinstance(fill, str):
  212.             fill = fill.encode()
  213.         if isinstance(fill, (bytes, bytearray)):
  214.             fill = fill[0]
  215.         buffer[0:idx] = repeat(fill, idx)
  216.     bits = 0
  217.     c_mask = 0
  218.     c_refill = 1 if bitorder=='bottom' else 0x80
  219.     d = iter(data)
  220.  
  221.     r_mask = ring-1
  222.     threshold += 1
  223.     b_shift = r_mask.bit_length() - 8
  224.     s_mask = (0xFF >> b_shift) & 0xFF
  225.     b_mask = s_mask ^ 0xFF
  226.  
  227.     try:
  228.      while count:
  229.         #if bits<2:
  230.         #    bits = next(d) | 0x100
  231.         #if(bits&1):
  232.         if not c_mask:
  233.             bits = next(d)
  234.             c_mask = c_refill
  235.         count -= 1
  236.         if bool(bits&c_mask) == condition:
  237.             v = next(d)
  238.             buffer[idx]=v
  239.             idx=(idx+1)&r_mask
  240.             output.append(v)
  241.         else:
  242.             if byteorder == 'little':
  243.                 val = next(d)
  244.                 size = next(d)
  245.             else:
  246.                 size = next(d)
  247.                 val = next(d)
  248.             back = (size & b_mask) << b_shift
  249.             back &= 0xFF00
  250.             back |= val
  251.             # back &= r_mask
  252.             size = (size & s_mask) + threshold
  253.             if relative:
  254.                 for x in range(size):
  255.                     buffer[idx] = output[-back]
  256.                     output.append(output[-back])
  257.                     idx=(idx+1)&r_mask
  258.             else:
  259.                 for x in range(size):
  260.                     buffer[idx] = buffer[back]
  261.                     output.append(buffer[back])
  262.                     idx=(idx+1)&r_mask
  263.                     back = (back+1)&r_mask
  264.         #bits>>=1
  265.         if bitorder == 'bottom':
  266.             c_mask <<= 1
  267.         else:
  268.             c_mask >>= 1
  269.         c_mask &= 0xFF
  270.         if out_sz:
  271.             if out_sz<=len(output):
  272.                 break
  273.     except IndexError:
  274.         import warnings
  275.         warnings.warn("\tError!  Insufficient data.\n  Probably not an LZSS file.\n")
  276.         return bytes()
  277.     except StopIteration as E:
  278.         if out_sz:
  279.             raise E
  280.     return bytes(output)
  281.  
  282. def length(data, out_sz, ring, threshold, byteorder, bitorder='bottom', condition=True, count=-1):
  283.     """
  284.    Returns (compressed size, decompressed size).
  285.    if [out_sz] is None, will decompress to the end of file.
  286.    [ring], [threshold], and [byteorder]
  287.        should match the original compression settings.
  288.  
  289.    [ring] may be any size between 0x4000 and 0x100.
  290.    [threshold] is the shortest acceptable copy, minus one.
  291.    [byteorder] sets the byteorder copy commands are written in.
  292.        Usually this is 'little', but 'big' is also acceptable.
  293.    [bitorder] indicates if command bits are read from the 'bottom' or 'top'.
  294.    [condition] sets if a literal is written when a flag is True or False.
  295.    [count], when set, terminates decompression after the given number of commands.
  296.    """
  297.     sz, out = 0, 0
  298.     bits = 0
  299.     c_mask = 0
  300.     c_refill = 1 if bitorder=='bottom' else 0x80
  301.  
  302.     r_mask = ring-1
  303.     threshold += 1
  304.     b_shift = r_mask.bit_length() - 8
  305.     s_mask = (0xFF >> b_shift) & 0xFF
  306.  
  307.     try:
  308.      while count:
  309.         if not c_mask:
  310.             bits = data[sz]
  311.             sz += 1
  312.             c_mask = c_refill
  313.         count -= 1
  314.         if bool(bits&c_mask) == condition:
  315.             v = data[sz]
  316.             sz += 1
  317.             out += 1
  318.         else:
  319.             if byteorder == 'little':
  320.                 val = data[sz]
  321.                 size = data[sz+1]
  322.             else:
  323.                 size = data[sz]
  324.                 val = data[sz+1]
  325.             sz += 2
  326.             out += (size & s_mask) + threshold
  327.         if bitorder == 'bottom':
  328.             c_mask <<= 1
  329.         else:
  330.             c_mask >>= 1
  331.         c_mask &= 0xFF
  332.         if out_sz:
  333.             if out_sz<=out:
  334.                 break
  335.     except Exception as E:
  336.         print(E)
  337.         # Still return progress to this point.
  338.     return (sz, out)
  339.  
  340. __modes = {'txt':0x20, 'bin':0}
  341.  
  342. def encode1KB(data, mode='bin', **kwargs):
  343.     """Compresses [data] using 1KB ring and given fill mode,
  344.        returning a bytes object.
  345.    [mode] may be:
  346.        'txt': fills buffer with spaces (b' ', or ASCII code 0x20)
  347.        'bin': fills buffer with zeroes
  348.    same as:
  349.        encode(data, 1024, 66, 2, 958, mode, 'little', False)
  350.    """
  351.     f = __modes.get(mode)
  352.     return encode(data, ring=1024, limit=66, threshold=2, idx=kwargs.pop('idx', 958), fill=f, byteorder=kwargs.pop('byteorder', 'little'), condition=kwargs.pop('condition', False), **kwargs)
  353.  
  354. def decode1KB(data, out_sz=None, mode='bin', **kwargs):
  355.     """Decompresses [data] using 1KB ring and given fill [mode],
  356.        returning a bytes object.
  357.    If [out_sz] is not given, decompresses to the end of data.
  358.    [mode] may be:
  359.        'txt': fills buffer with spaces (b' ', or ASCII code 0x20)
  360.        'bin': fills buffer with zeroes
  361.    same as:
  362.        decode(data, out_sz, 1024, 2, 958, mode, 'little', 'bottom', False)
  363.    """
  364.     f = __modes.get(mode)
  365.     return decode(data, out_sz, ring=1024, threshold=2, idx=kwargs.pop('idx', 958), fill=f, byteorder=kwargs.pop('byteorder', 'little'), condition=kwargs.pop('condition', False), **kwargs)
  366.  
  367. def length1KB(data, out_sz=None, **kwargs):
  368.     """Returns (compressed size, decompressed size) of [data] using 1KB ring,
  369.        basically by faking decompression.
  370.    If [out_sz] is not given, "decompresses" to the end of data.
  371.    same as:
  372.        length(data, out_sz, 1024, 2, 'little', 'bottom', False)
  373.    """
  374.     return length(data, out_sz, ring=1024, threshold=2, byteorder=kwargs.pop('byteorder', 'little'), condition=kwargs.pop('condition', False), **kwargs)
  375.  
  376. def main():
  377.     pass
  378.  
  379. if __name__ == '__main__':
  380.     main()
Advertisement
Add Comment
Please, Sign In to add comment