View difference between Paste ID: 0WbMmhFc and ghY9fH93
SHOW: | | - or go back to the newest paste.
1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <string.h>
4
#include <stdint.h>
5
#include <unistd.h>
6
#include <fcntl.h>
7
#include <sys/types.h>
8
#include <sys/stat.h>
9
10
typedef uint8_t byte;
11
typedef uint16_t ushort;
12
typedef uint32_t uint;
13
14
15
#define MAX_DICT_SIZE 4096
16
17
byte* lzw_encode_default(byte *data, uint in_len, int start_bits, int *out_len){
18
19
    uint tmp, o_bits, out_index, i, j, o;
20
    uint code_size = start_bits +1;
21
    uint clear_code = 1 << start_bits;
22
    uint next_code = clear_code +2;
23
    tmp = o_bits = out_index = 0;
24
    // we assume we don't manage to double the size somehow... (dangerous waters and such)
25
    // To be on the safe side we need to calculate for the worst case like:
26
    // (len(data) * 12 (bit, just to be sure) + how_often_we_send_clear_in_the_worst_case) / 8 + one padding byte (?)
27
    byte *buffer = (byte*)malloc(in_len * 2); // free this thing after use !
28
29
    // we save our dictionary as enflated array
30
    // For example if the sequence "abcd" is known:
31
    // dict['a' * 256 + 'b'] = CODE | CODE2 = dict[CODE * 256 + 'c'] | "abcd" = dict[CODE2 * 256 + 'd']
32
    // the dictionarys size will be maximal (((2 ** 12) * 256 * 4) / 1024) / 1024 = 2.0 (mb)
33
    // Because the valid codes are maximal 12 bits long we can use signed shorts (<0 -> invalid).
34
    uint dict_size = 256 * (1 << code_size);
35
    int16_t *dictionary = (int16_t*)malloc(dict_size * sizeof(int16_t));
36
    for(i=0; i < dict_size; dictionary[i++]=-1); // prepare the dictionary
37
38
    // Some bit magic to write with the chosen code_size
39
    inline void write_bits(ushort x) {
40
        tmp |= x << o_bits;
41
        o_bits += code_size;
42
        while (o_bits >= 8) {
43
            o_bits -= 8;
44
            buffer[out_index++] = tmp & 0xff;
45
            tmp >>= 8;
46
        }
47
    }
48
49
    write_bits(clear_code);
50
    int last_index;
51
    for(i=0; i < in_len;){
52
        last_index = data[i];
53
        o = i + 1;
54
        // If we are reading the last byte just write it out as it is.
55
        if (o >= in_len){
56
            write_bits(data[i]);
57
            break;
58
        }
59
        // Loop till we find an unknown sequence
60
        while(o < in_len -1 && dictionary[ (last_index * 256) + data[o] ] >= 0){
61
            last_index = (ushort) dictionary[ (last_index * 256) + data[o] ]; // & 0xffff;
62
            o++;
63
        }
64
        // write the code and save the new sequence
65
        dictionary[ (last_index * 256) + data[o] ] = next_code;
66
        write_bits(last_index);
67
        i = o;
68
        if(next_code < MAX_DICT_SIZE){
69
            // increase bits used
70
            if((next_code & (next_code - 1)) == 0){  // Is a power of 2 ?
71
                code_size++;
72
                // TODO: clean this up # Should be way nicer to calc.. to tired TODO:
73
                // increase the dictionary if required
74
                if(dict_size <  (1 << code_size) *  256){
75
                    dict_size = (1 << code_size) *  256;
76
                    dictionary = realloc(dictionary, dict_size * sizeof(int16_t));
77
                    for(j=(1 << (code_size -1))*256; j < dict_size; dictionary[j++]=-1);
78
                }
79
            }
80
            next_code++;
81
        }else{
82
            // Reset everything and begin all over again.
83
            write_bits(clear_code);
84
            code_size = start_bits + 1;
85
            next_code = clear_code + 2;
86
            for(j=0; j < dict_size; dictionary[j++]=-1);
87
        }
88
    }
89
    // Send the trailer and the remaining byte and we are done.
90
    write_bits(clear_code+1);
91
    if(tmp) buffer[out_index++] = tmp & 0xff;
92
    *out_len = out_index; // size ie. points to one after last valid byte.
93
    free(dictionary);
94
    return buffer;
95
}