SHOW:
|
|
- or go back to the newest paste.
1 | #include <stdio.h> | |
2 | #include <stdlib.h> | |
3 | #include <string.h> | |
4 | #include <stdint.h> | |
5 | #include <unistd.h> | |
6 | #include <fcntl.h> | |
7 | #include <sys/types.h> | |
8 | #include <sys/stat.h> | |
9 | ||
10 | typedef uint8_t byte; | |
11 | typedef uint16_t ushort; | |
12 | typedef uint32_t uint; | |
13 | ||
14 | ||
15 | #define MAX_DICT_SIZE 4096 | |
16 | ||
17 | byte* lzw_encode_default(byte *data, uint in_len, int start_bits, int *out_len){ | |
18 | ||
19 | uint tmp, o_bits, out_index, i, j, o; | |
20 | uint code_size = start_bits +1; | |
21 | uint clear_code = 1 << start_bits; | |
22 | uint next_code = clear_code +2; | |
23 | tmp = o_bits = out_index = 0; | |
24 | // we assume we don't manage to double the size somehow... (dangerous waters and such) | |
25 | // To be on the safe side we need to calculate for the worst case like: | |
26 | // (len(data) * 12 (bit, just to be sure) + how_often_we_send_clear_in_the_worst_case) / 8 + one padding byte (?) | |
27 | byte *buffer = (byte*)malloc(in_len * 2); // free this thing after use ! | |
28 | ||
29 | // we save our dictionary as enflated array | |
30 | // For example if the sequence "abcd" is known: | |
31 | // dict['a' * 256 + 'b'] = CODE | CODE2 = dict[CODE * 256 + 'c'] | "abcd" = dict[CODE2 * 256 + 'd'] | |
32 | // the dictionarys size will be maximal (((2 ** 12) * 256 * 4) / 1024) / 1024 = 2.0 (mb) | |
33 | // Because the valid codes are maximal 12 bits long we can use signed shorts (<0 -> invalid). | |
34 | uint dict_size = 256 * (1 << code_size); | |
35 | int16_t *dictionary = (int16_t*)malloc(dict_size * sizeof(int16_t)); | |
36 | for(i=0; i < dict_size; dictionary[i++]=-1); // prepare the dictionary | |
37 | ||
38 | // Some bit magic to write with the chosen code_size | |
39 | inline void write_bits(ushort x) { | |
40 | tmp |= x << o_bits; | |
41 | o_bits += code_size; | |
42 | while (o_bits >= 8) { | |
43 | o_bits -= 8; | |
44 | buffer[out_index++] = tmp & 0xff; | |
45 | tmp >>= 8; | |
46 | } | |
47 | } | |
48 | ||
49 | write_bits(clear_code); | |
50 | int last_index; | |
51 | for(i=0; i < in_len;){ | |
52 | last_index = data[i]; | |
53 | o = i + 1; | |
54 | // If we are reading the last byte just write it out as it is. | |
55 | if (o >= in_len){ | |
56 | write_bits(data[i]); | |
57 | break; | |
58 | } | |
59 | // Loop till we find an unknown sequence | |
60 | while(o < in_len -1 && dictionary[ (last_index * 256) + data[o] ] >= 0){ | |
61 | last_index = (ushort) dictionary[ (last_index * 256) + data[o] ]; // & 0xffff; | |
62 | o++; | |
63 | } | |
64 | // write the code and save the new sequence | |
65 | dictionary[ (last_index * 256) + data[o] ] = next_code; | |
66 | write_bits(last_index); | |
67 | i = o; | |
68 | if(next_code < MAX_DICT_SIZE){ | |
69 | // increase bits used | |
70 | if((next_code & (next_code - 1)) == 0){ // Is a power of 2 ? | |
71 | code_size++; | |
72 | // TODO: clean this up # Should be way nicer to calc.. to tired TODO: | |
73 | // increase the dictionary if required | |
74 | if(dict_size < (1 << code_size) * 256){ | |
75 | dict_size = (1 << code_size) * 256; | |
76 | dictionary = realloc(dictionary, dict_size * sizeof(int16_t)); | |
77 | for(j=(1 << (code_size -1))*256; j < dict_size; dictionary[j++]=-1); | |
78 | } | |
79 | } | |
80 | next_code++; | |
81 | }else{ | |
82 | // Reset everything and begin all over again. | |
83 | write_bits(clear_code); | |
84 | code_size = start_bits + 1; | |
85 | next_code = clear_code + 2; | |
86 | for(j=0; j < dict_size; dictionary[j++]=-1); | |
87 | } | |
88 | } | |
89 | // Send the trailer and the remaining byte and we are done. | |
90 | write_bits(clear_code+1); | |
91 | if(tmp) buffer[out_index++] = tmp & 0xff; | |
92 | *out_len = out_index; // size ie. points to one after last valid byte. | |
93 | free(dictionary); | |
94 | return buffer; | |
95 | } |