Not a member of Pastebin yet?
                        Sign Up,
                        it unlocks many cool features!                    
                - /* My tiny gzip decompressor without using zlib. - Joel Yliluoma
 - * http://iki.fi/bisqwit/ , http://youtube.com/user/Bisqwit
 - * Inspired and influenced by a 13th IOCCC winner program by Ron McFarland */
 - /* Fun fact: Contains zero new/delete, and no STL data structures */
 - #include <assert.h>
 - // This is the public method declared (later) in this file.
 - template<typename InputFunctor, typename OutputFunctor>
 - void Deflate(InputFunctor&& input, OutputFunctor&& output);
 - // The rest of the file is just for the curious about implementation.
 - namespace gunzip_ns
 - {
 - template<typename N, typename U=unsigned short>
 - struct PoolType
 - {
 - N* const ptr; // Pointer to the beginning of storage for nodes
 - U used; // How many nodes have been allocated
 - void clear() { used=0; }
 - N* get(U n) const { return n ? &ptr[n-1] : nullptr; }
 - N* allocate(U& n) { return n ? get(n) : get(n = ++used)->Reset(); }
 - };
 - struct huffnode
 - {
 - unsigned short branch[2], value,_;
 - huffnode* Reset() { branch[0]=0; branch[1]=0; return this; }
 - // Create a huffman tree for num_values, with given lengths.
 - // The tree will be put in branch[which]; other branch not touched.
 - // Length = how many bits to use for encoding this value.
 - void Create(unsigned which, unsigned num_values,
 - const unsigned char lengths[], PoolType<huffnode>& pool)
 - {
 - branch[which] = 0;
 - unsigned count[16] { 0 }, offs[16] { 0 };
 - for(unsigned a = 0; a < num_values; ++a) count[lengths[a]] += 1;
 - for(unsigned a = 0; a < 15; a++) offs[a + 1] = (offs[a] + count[a]) * 2u;
 - // Largest values seen in wild for offs[16]: 16777216
 - for(unsigned value = 0; value < num_values; ++value)
 - if(unsigned length = lengths[value])
 - {
 - huffnode* node = pool.allocate(branch[which]);
 - for(unsigned q = offs[lengths[value]]++; length--; )
 - node = pool.allocate(node->branch[ (q >> length) & 1 ]);
 - node->value = value;
 - }
 - }
 - };
 - // Length codes: base + number of bits (0-28)
 - // Distance codes: base + number of bits (0-29)
 - // Order of lengths for dynamic block decoding (0-18)
 - constexpr unsigned short
 - dbase[30] { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193 };
 - constexpr unsigned char
 - lbase[29] { 0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224,255 }, // +3
 - lbits[29] { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0 },
 - dbits[30] { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 },
 - order[19] { 16,17,18,0, 8,7,9,6, 10,5,11,4, 12,3,13,2, 14,1,15 },
 - fix[15] { 7,32,4, 8,35,2, 8,0,20, 9,18,14, 5,36,4 };
 - }
 - template<typename InputFunctor, typename OutputFunctor>
 - void Deflate(InputFunctor&& input, OutputFunctor&& output)
 - {
 - using namespace gunzip_ns;
 - // Bit-by-bit input routine
 - struct { unsigned long Cache; unsigned Count,_; } Bits {0,0,0};
 - auto GetBits = [&Bits,&input](unsigned l)
 - {
 - for (; Bits.Count < l; Bits.Count += 8)
 - Bits.Cache |= (input() & 0xFFul) << Bits.Count;
 - unsigned w = Bits.Cache & ((1ul << l) - 1);
 - Bits.Cache >>= l;
 - Bits.Count -= l;
 - return w;
 - };
 - // Read stream header
 - unsigned header = GetBits(16), winsize = 32768u;
 - // ^ Read deflate header: method[4] ; winsize[4] ; checksum[8]
 - if(header == 0x8B1F) // Is it actually a gzip header?
 - {
 - unsigned fmt = GetBits(8), o = GetBits(8); // Get format & flags
 - assert(fmt == 8);
 - GetBits(48); // MTIME (3 of 4); MTIME(1 of 4), XFL and OS
 - if(o&4) // Skip extra fields as indicated by FEXTRA
 - for(unsigned q = GetBits(16); q--; GetBits(8));
 - if(o&8) while(GetBits(8)) {} // NAME: Skip filename if FNAME was present
 - if(o&16) while(GetBits(8)) {} // COMMENT: Skip comment if FCOMMENT was present
 - if(o&2) GetBits(16); // HCRC: Skip FCRC if was present
 - }
 - else // No. Deflate header?
 - {
 - winsize = 256 << ((header >> 4) & 0xF);
 - assert((header & 0x200F) == 8 && !((((header<<8)+(header>>8))&0xFFFF)%31) && winsize <= 32768u);
 - }
 - // Output routine. Also updates the window for backward references.
 - unsigned char WindowData[32768u/*winsize*/], Lengths[288+32]; // Lengths are in 0..15 range
 - struct { unsigned Head,SizeMask; unsigned char* const Data; } Window {0,winsize-1, WindowData};
 - auto Put = [&Window,&output](unsigned char l) { output(Window.Data[Window.Head++ & Window.SizeMask] = l); };
 - // Function for reading a huffman-encoded value from bitstream.
 - constexpr unsigned pool1_reserved=638, pool2_reserved=642;
 - huffnode tables_fixed={{0,0},0,0}, tables_dyn, pool[pool1_reserved+pool2_reserved], *cur_table;
 - PoolType<huffnode> pool1{&pool[pool2_reserved],0}, pool2{&pool[0],0}, *cur_pool;
 - auto Read = [&GetBits](const huffnode& root, unsigned which, const PoolType<huffnode>& pool)
 - {
 - const auto* node = pool.get(root.branch[which]);
 - while(node->branch[1]) node = pool.get(node->branch[GetBits(1)]);
 - return node->value;
 - };
 - // Read compressed blocks
 - for(bool last=false; !last; )
 - {
 - switch(last=GetBits(1), header=GetBits(2))
 - {
 - case 0: // copy stored block data
 - GetBits(Bits.Count%8); // Go to byte boundary (discard a few bits)
 - {unsigned a = GetBits(16), b = GetBits(16);
 - assert(a == (b^0xFFFF));
 - // Note: It is valid for "a" to be 0 here.
 - // It is sometimes used for aligning the stream to byte boundary.
 - while(a--) Put(GetBits(8)); }
 - continue;
 - case 1: // fixed block
 - if(!tables_fixed.branch[0])
 - {
 - // Create fixed tables. Technically, this table could be generated
 - // at compile-time, but I don't see an easy way to do it.
 - for(auto n=0u; n<sizeof fix; )
 - for(unsigned what=fix[n++], p=8*fix[n++], end=p+8*fix[n++]; p<end; ++p)
 - Lengths[p] = what;
 - // Note: fix[] intentionally contains overlaps. It is designed
 - // such that clang generates rather optimal SSE assembly code.
 - tables_fixed.Create(0, 288, Lengths, pool1); // 575 used here
 - tables_fixed.Create(1, 32, Lengths+288, pool1); // 63 used here
 - assert(pool1.used == pool1_reserved);
 - // ^pool1 has always 638 elements. If this assertion fails,
 - // something is wrong. Maybe coincidentally same as (288+32-1)*2.
 - }
 - cur_table = &tables_fixed; cur_pool = &pool1;
 - break;
 - default: // dynamic block
 - assert(header==2);
 - unsigned nlen = GetBits(5) + 257; // 257..288
 - unsigned ndist = GetBits(5) + 1; // 1..32
 - unsigned ncode = GetBits(4) + 4; // 4..19
 - assert(nlen+ndist <= 288+32);
 - for(unsigned a=0; a<19; ++a) Lengths[order[a]] = a<ncode ? GetBits(3) : 0;
 - pool2.clear();
 - tables_dyn.Create(0, 19, Lengths+0, pool2); // length-lengths
 - assert(pool2.used <= pool2_reserved);
 - for(unsigned end,lencode,prev_lencode=0, code=0; code < nlen + ndist; )
 - {
 - switch(lencode = Read(tables_dyn, 0, pool2))
 - {
 - case 16: end = code+3+GetBits(2); lencode = prev_lencode; break;
 - case 17: end = code+3+GetBits(3); lencode = 0; break;
 - case 18: end = code+11+GetBits(7); lencode = 0; break;
 - default: end = code+1; prev_lencode = lencode; assert(lencode < 16);
 - }
 - assert(end <= nlen+ndist);
 - while(code < end) Lengths[code++] = lencode;
 - }
 - pool2.clear();
 - tables_dyn.Create(0, nlen, Lengths+0, pool2);
 - tables_dyn.Create(1, ndist, Lengths+nlen, pool2);
 - assert(pool2.used <= pool2_reserved);
 - // ^If this assertion fails, simply increase pool2_reserved.
 - // Try e.g. 2048-pool1_reserved.
 - // So far the largest value seen in the wild is 630.
 - //fprintf(stderr, "pool2 size%5u\n", pool2.used);
 - cur_table = &tables_dyn; cur_pool = &pool2;
 - }
 - // Do actual deflating.
 - for(;;)
 - {
 - auto code = Read(*cur_table, 0, *cur_pool);
 - if(!(code & -256)) Put(code); // 0..255: literal byte
 - else if(!(code & 0xFF)) break; // 256=end
 - else // 257..285: backwards reference
 - {
 - unsigned length = (lbase-257)[code] + GetBits((lbits-257)[code]) + 3;
 - code = Read(*cur_table, 1, *cur_pool); // Read distance code
 - unsigned distance = dbase[code] + GetBits(dbits[code]);
 - unsigned offs = Window.Head - distance;
 - do Put(Window.Data[offs++ & Window.SizeMask]); while(--length);
 - }
 - }
 - }
 - // Note: after this, may come a checksum, and a trailer. Ignoring them.
 - }
 
Advertisement
 
                    Add Comment                
                
                        Please, Sign In to add comment