lzma based pico8 compression algorithm

--[[
lzma based pico8 compression algorithm

decompression in p8:

(the function as written copies the compressed data from the original cart to memory address 0, before decompressing it to a string. changing it to make more sense for your case is left as an exercise for the reader)
]]

function decompress(src, srclen)
    reload(0, src, srclen)

    local str, range, size, code, addr, probs, divs = "", ~0, %0, $2, 6, {[0x101]=0x20,[0x121]=0x20}, {}

    local function bit(off)
        if range^^0x8000 < 0x8100 then
            range <<= 8; code <<= 8
            code |= @addr >>> 16; addr += 1
        end

        local prob = probs[off] or 0x400
        local div = divs[off] or 3
        local bound = (range >>> 11) * prob
        local result = code^^0x8000 < bound^^0x8000
        if result then
            range = bound
            prob += (0x800 - prob) \ div
        else
            range -= bound; code -= bound
            prob -= prob \ div
        end
        if (off) probs[off] = prob; divs[off] = min(div + 1, 16)
        return result
    end

    local function val(limit, off)
        local idx = 1
        while (idx < limit) idx = idx * 2 + tonum(bit(off and off + idx))
        return idx - limit
    end

    local function xval(idx, shift)
        local bits = idx \ shift - 1
        return bits < 0 and idx or (shift + (idx & shift-1) << bits) + val(1 << bits)
    end

    while #str < size do
        if bit(0) then
            str ..= chr(val(0x100, 0))
        else
            local offset = 1 + xval(val(32, 0x100), 2)
            local count = 3 + xval(val(32, 0x120), 4)

            local pattern = sub(str, -offset, #str - offset + count)
            while (#pattern < count) pattern ..= sub(pattern,1,count - #pattern)
            str ..= pattern
        end
    end

    return str
end

--[[
  compression in python

  requires files from https://github.com/thisismypassport/shrinko8 in current directory

  (rewriting it in p8, if wanted, is left as - you guessed it)
]]

from utils import *
from pico_compress import get_lz77, Lz77Tuple

def compress(code):
    def split_count_val(val):
        for shift in range(7):
            if val < (8 << shift):
                return (4 * shift) + (val >> shift), shift, val & make_mask(0, shift)
        raise Exception()

    def split_offset_val(val):
        for shift in range(14):
            if val < (4 << shift):
                return (2 * shift) + (val >> shift), shift, val & make_mask(0, shift)
        raise Exception()

    low, rng, cache, cachesize = 0, 0xffffffff, 0, 1
    probs = [0x400] * 0x140
    probs[0x101] = probs[0x121] = 0x20
    divs = [3] * 0x140

    f = BytesIO()
    w = BinaryWriter(f)
    w.u8(0) # will be overwritten below

    def range_norm():
        nonlocal low, rng, cache, cachesize

        if low < 0xff000000 or low >= 0x100000000:
            w.u8((cache + (low >> 32)) & 0xff)
            for i in range(cachesize - 1):
                w.u8((0xff + (low >> 32)) & 0xff)
            cache, cachesize = (low >> 24) & 0xff, 0

        cachesize += 1
        low = (low & 0xffffff) << 8
        rng <<= 8

    def range_bit(b, pri):
        nonlocal low, rng
        if rng < 0x1000000:
            range_norm()

        prob = 0x400 if pri is None else probs[pri]
        div = 1 if pri is None else divs[pri]
        bound = (rng >> 11) * prob
        if b:
            rng = bound
            prob += (0x800 - prob) // div
        else:
            rng -= bound
            low += bound
            prob -= prob // div

        if pri is not None:
            probs[pri] = prob
            divs[pri] = min(div + 1, 16)

    def range_val(val, limit, pri):
        idx = 1
        limbits = (limit - 1).bit_length()
        while idx < limit:
            limbits -= 1
            b = (val >> limbits) & 1
            range_bit(b, None if pri is None else pri + idx)
            idx = idx * 2 + b

    for i, item in get_lz77(code, max_c=0x202):
        islit = not isinstance(item, Lz77Tuple)
        range_bit(islit, 0)

        if islit:
            range_val(ord(item), 0x100, 0)

        else:
            offval, countval = item.off, item.cnt

            oidx, obits, oval = split_offset_val(offval)
            range_val(oidx, 0x20, 0x100)
            range_val(oval, 1 << obits, None)

            cidx, cbits, cval = split_count_val(countval)
            range_val(cidx, 0x20, 0x120)
            range_val(cval, 1 << cbits, None)

    for i in range(5):
        range_norm()

    r = BinaryReader(f)
    r.setpos(1)
    assert r.u8() == 0
    a, b, c, d = r.u8(), r.u8(), r.u8(), r.u8()

    w.setpos(0)
    w.u16(len(code))
    w.u8(d); w.u8(c); w.u8(b); w.u8(a)

    return f.getvalue()