Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <stdint.h>
- #include <unistd.h>
- #include <fcntl.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- typedef uint8_t byte;
- typedef uint16_t ushort;
- typedef uint32_t uint;
- #define MAX_DICT_SIZE 4096
- byte* lzw_encode_default(byte *data, uint in_len, int start_bits, int *out_len){
- uint tmp, o_bits, out_index, i, j, o;
- uint code_size = start_bits +1;
- uint clear_code = 1 << start_bits;
- uint next_code = clear_code +2;
- tmp = o_bits = out_index = 0;
- // we assume we don't manage to double the size somehow... (dangerous waters and such)
- // To be on the safe side we need to calculate for the worst case like:
- // (len(data) * 12 (bit, just to be sure) + how_often_we_send_clear_in_the_worst_case) / 8 + one padding byte (?)
- byte *buffer = (byte*)malloc(in_len * 2); // free this thing after use !
- // we save our dictionary as enflated array
- // For example if the sequence "abcd" is known:
- // dict['a' * 256 + 'b'] = CODE | CODE2 = dict[CODE * 256 + 'c'] | "abcd" = dict[CODE2 * 256 + 'd']
- // the dictionarys size will be maximal (((2 ** 12) * 256 * 4) / 1024) / 1024 = 2.0 (mb)
- // Because the valid codes are maximal 12 bits long we can use signed shorts (<0 -> invalid).
- uint dict_size = 256 * (1 << code_size);
- int16_t *dictionary = (int16_t*)malloc(dict_size * sizeof(int16_t));
- for(i=0; i < dict_size; dictionary[i++]=-1); // prepare the dictionary
- // Some bit magic to write with the chosen code_size
- inline void write_bits(ushort x) {
- tmp |= x << o_bits;
- o_bits += code_size;
- while (o_bits >= 8) {
- o_bits -= 8;
- buffer[out_index++] = tmp & 0xff;
- tmp >>= 8;
- }
- }
- write_bits(clear_code);
- int last_index;
- for(i=0; i < in_len;){
- last_index = data[i];
- o = i + 1;
- // If we are reading the last byte just write it out as it is.
- if (o >= in_len){
- write_bits(data[i]);
- break;
- }
- // Loop till we find an unknown sequence
- while(o < in_len -1 && dictionary[ (last_index * 256) + data[o] ] >= 0){
- last_index = (ushort) dictionary[ (last_index * 256) + data[o] ]; // & 0xffff;
- o++;
- }
- // write the code and save the new sequence
- dictionary[ (last_index * 256) + data[o] ] = next_code;
- write_bits(last_index);
- i = o;
- if(next_code < MAX_DICT_SIZE){
- // increase bits used
- if((next_code & (next_code - 1)) == 0){ // Is a power of 2 ?
- code_size++;
- // TODO: clean this up # Should be way nicer to calc.. to tired TODO:
- // increase the dictionary if required
- if(dict_size < (1 << code_size) * 256){
- dict_size = (1 << code_size) * 256;
- dictionary = realloc(dictionary, dict_size * sizeof(int16_t));
- for(j=(1 << (code_size -1))*256; j < dict_size; dictionary[j++]=-1);
- }
- }
- next_code++;
- }else{
- // Reset everything and begin all over again.
- write_bits(clear_code);
- code_size = start_bits + 1;
- next_code = clear_code + 2;
- for(j=0; j < dict_size; dictionary[j++]=-1);
- }
- }
- // Send the trailer and the remaining byte and we are done.
- write_bits(clear_code+1);
- if(tmp) buffer[out_index++] = tmp & 0xff;
- *out_len = out_index; // size ie. points to one after last valid byte.
- free(dictionary);
- return buffer;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement