View difference between Paste ID: <a href="/0WbMmhFc">0WbMmhFc</a> and <a href="/ghY9fH93">ghY9fH93</a>

#include <stdio.h>
1		#include <stdio.h>
2		#include <stdlib.h>
3		#include <string.h>
4		#include <stdint.h>
5		#include <unistd.h>
6		#include <fcntl.h>
7		#include <sys/types.h>
8		#include <sys/stat.h>
9
10		typedef uint8_t byte;
11		typedef uint16_t ushort;
12		typedef uint32_t uint;
13
14
15		#define MAX_DICT_SIZE 4096
16
17		byte* lzw_encode_default(byte data, uint in_len, int start_bits, int out_len){
18
19		uint tmp, o_bits, out_index, i, j, o;
20		uint code_size = start_bits +1;
21		uint clear_code = 1 << start_bits;
22		uint next_code = clear_code +2;
23		tmp = o_bits = out_index = 0;
24		// we assume we don't manage to double the size somehow... (dangerous waters and such)
25		// To be on the safe side we need to calculate for the worst case like:
26		// (len(data) * 12 (bit, just to be sure) + how_often_we_send_clear_in_the_worst_case) / 8 + one padding byte (?)
27		byte buffer = (byte)malloc(in_len * 2); // free this thing after use !
28
29		// we save our dictionary as enflated array
30		// For example if the sequence "abcd" is known:
31		// dict['a' * 256 + 'b'] = CODE \| CODE2 = dict[CODE * 256 + 'c'] \| "abcd" = dict[CODE2 * 256 + 'd']
32		// the dictionarys size will be maximal (((2 ** 12) * 256 * 4) / 1024) / 1024 = 2.0 (mb)
33		// Because the valid codes are maximal 12 bits long we can use signed shorts (<0 -> invalid).
34		uint dict_size = 256 * (1 << code_size);
35		int16_t dictionary = (int16_t)malloc(dict_size * sizeof(int16_t));
36		for(i=0; i < dict_size; dictionary[i++]=-1); // prepare the dictionary
37
38		// Some bit magic to write with the chosen code_size
39		inline void write_bits(ushort x) {
40		tmp \|= x << o_bits;
41		o_bits += code_size;
42		while (o_bits >= 8) {
43		o_bits -= 8;
44		buffer[out_index++] = tmp & 0xff;
45		tmp >>= 8;
46		}
47		}
48
49		write_bits(clear_code);
50		int last_index;
51		for(i=0; i < in_len;){
52		last_index = data[i];
53		o = i + 1;
54		// If we are reading the last byte just write it out as it is.
55		if (o >= in_len){
56		write_bits(data[i]);
57		break;
58		}
59		// Loop till we find an unknown sequence
60		while(o < in_len -1 && dictionary[ (last_index * 256) + data[o] ] >= 0){
61		last_index = (ushort) dictionary[ (last_index * 256) + data[o] ]; // & 0xffff;
62		o++;
63		}
64		// write the code and save the new sequence
65		dictionary[ (last_index * 256) + data[o] ] = next_code;
66		write_bits(last_index);
67		i = o;
68		if(next_code < MAX_DICT_SIZE){
69		// increase bits used
70		if((next_code & (next_code - 1)) == 0){ // Is a power of 2 ?
71		code_size++;
72		// TODO: clean this up # Should be way nicer to calc.. to tired TODO:
73		// increase the dictionary if required
74		if(dict_size < (1 << code_size) * 256){
75		dict_size = (1 << code_size) * 256;
76		dictionary = realloc(dictionary, dict_size * sizeof(int16_t));
77		for(j=(1 << (code_size -1))*256; j < dict_size; dictionary[j++]=-1);
78		}
79		}
80		next_code++;
81		}else{
82		// Reset everything and begin all over again.
83		write_bits(clear_code);
84		code_size = start_bits + 1;
85		next_code = clear_code + 2;
86		for(j=0; j < dict_size; dictionary[j++]=-1);
87		}
88		}
89		// Send the trailer and the remaining byte and we are done.
90		write_bits(clear_code+1);
91		if(tmp) buffer[out_index++] = tmp & 0xff;
92		*out_len = out_index; // size ie. points to one after last valid byte.
93		free(dictionary);
94		return buffer;
95		}