lzw_gif_encode.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>

typedef uint8_t byte;
typedef uint16_t ushort;
typedef uint32_t uint;


#define MAX_DICT_SIZE 4096

byte* lzw_encode_default(byte *data, uint in_len, int start_bits, int *out_len){

    uint tmp, o_bits, out_index, i, j, o;
    uint code_size = start_bits +1;
    uint clear_code = 1 << start_bits;
    uint next_code = clear_code +2;
    tmp = o_bits = out_index = 0;
    // we assume we don't manage to double the size somehow... (dangerous waters and such)
    // To be on the safe side we need to calculate for the worst case like:
    // (len(data) * 12 (bit, just to be sure) + how_often_we_send_clear_in_the_worst_case) / 8 + one padding byte (?)
    byte *buffer = (byte*)malloc(in_len * 2); // free this thing after use !

    // we save our dictionary as enflated array
    // For example if the sequence "abcd" is known:
    // dict['a' * 256 + 'b'] = CODE | CODE2 = dict[CODE * 256 + 'c'] | "abcd" = dict[CODE2 * 256 + 'd']
    // the dictionarys size will be maximal (((2 ** 12) * 256 * 4) / 1024) / 1024 = 2.0 (mb)
    // Because the valid codes are maximal 12 bits long we can use signed shorts (<0 -> invalid).
    uint dict_size = 256 * (1 << code_size);
    int16_t *dictionary = (int16_t*)malloc(dict_size * sizeof(int16_t));
    for(i=0; i < dict_size; dictionary[i++]=-1); // prepare the dictionary

    // Some bit magic to write with the chosen code_size
    inline void write_bits(ushort x) {
        tmp |= x << o_bits;
        o_bits += code_size;
        while (o_bits >= 8) {
            o_bits -= 8;
            buffer[out_index++] = tmp & 0xff;
            tmp >>= 8;
        }
    }

    write_bits(clear_code);
    int last_index;
    for(i=0; i < in_len;){
        last_index = data[i];
        o = i + 1;
        // If we are reading the last byte just write it out as it is.
        if (o >= in_len){
            write_bits(data[i]);
            break;
        }
        // Loop till we find an unknown sequence
        while(o < in_len -1 && dictionary[ (last_index * 256) + data[o] ] >= 0){
            last_index = (ushort) dictionary[ (last_index * 256) + data[o] ]; // & 0xffff;
            o++;
        }
        // write the code and save the new sequence
        dictionary[ (last_index * 256) + data[o] ] = next_code;
        write_bits(last_index);
        i = o;
        if(next_code < MAX_DICT_SIZE){
            // increase bits used
            if((next_code & (next_code - 1)) == 0){  // Is a power of 2 ?
                code_size++;
                // TODO: clean this up # Should be way nicer to calc.. to tired TODO:
                // increase the dictionary if required
                if(dict_size <  (1 << code_size) *  256){
                    dict_size = (1 << code_size) *  256;
                    dictionary = realloc(dictionary, dict_size * sizeof(int16_t));
                    for(j=(1 << (code_size -1))*256; j < dict_size; dictionary[j++]=-1);
                }
            }
            next_code++;
        }else{
            // Reset everything and begin all over again.
            write_bits(clear_code);
            code_size = start_bits + 1;
            next_code = clear_code + 2;
            for(j=0; j < dict_size; dictionary[j++]=-1);
        }
    }
    // Send the trailer and the remaining byte and we are done.
    write_bits(clear_code+1);
    if(tmp) buffer[out_index++] = tmp & 0xff;
    *out_len = out_index; // size ie. points to one after last valid byte.
    free(dictionary);
    return buffer;
}