Advertisement
Guest User

Untitled

a guest
Jan 21st, 2019
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 27.46 KB | None | 0 0
  1. namespace Pdf {
  2.  
  3. /* RFC 1951 compression ( https://www.ietf.org/rfc/rfc1951.txt ) aims to compress a stream of bytes using :
  4.  
  5. (1) LZ77 matching, where if an input sequences of at least 3 bytes re-occurs, it may be coded
  6. as a <length,distance> pointer.
  7.  
  8. (2) Huffman coding, where variable length codes are used, with more frequently used symbols encoded in less bits.
  9.  
  10. The input may be split into blocks, a new block introduces a new set of Huffman codes. The choice of block
  11. boundaries can affect compression. The method used to determine the block size is as follows:
  12.  
  13. (1) The size of the next block to be output is set to an initial value.
  14.  
  15. (2) A comparison is made between encoding two blocks of this size, or a double-length block.
  16.  
  17. (3) If the double-length encoding is better, that becomes the block size, and the process repeats.
  18.  
  19. LZ77 compression is implemented as suggested in the standard, although no attempt is made to
  20. truncate searches ( except searches terminate when the distance limit of 32k bytes is reached ).
  21.  
  22. Only dynamic huffman blocks are used, no attempt is made to use Fixed or Copy blocks.
  23.  
  24. Deflator ( this code) typically achieves better compression than ZLib ( http://www.componentace.com/zlib_.NET.htm
  25. via https://zlib.net/, default settings ) by a few percent, and is faster on small inputs, but slower
  26. on large inputs ( perhaps due to searches not being truncated ).
  27.  
  28. For example, compressing a font file FreeSans.ttf ( 264,072 bytes ), Zlib output is 148,324 bytes
  29. in 44 milliseconds, whereas Deflator output is 144,289 bytes, 4,035 bytes smaller, in 59 milliseconds.
  30.  
  31. Compressing a C# source file of 19,483 bytes, Zlib output size was 5,965 bytes in 27 milliseconds,
  32. whereas Deflator output was 5,890 bytes, 75 bytes smaller, in 16 milliseconds.
  33.  
  34. Sample usage:
  35.  
  36. byte [] data = { 1, 2, 3, 4 };
  37. var mbs = new MemoryBitStream();
  38. Deflator.Deflate( data, mbs, 1 );
  39. byte [] deflated_data = mbs.ToArray();
  40.  
  41. The MemoryBitStream may alternatively be copied to a stream, this may be useful when writing PDF files ( the intended use case ).
  42.  
  43. Auxiliary top level classes/structs ( included in this file ):
  44. * OutBitStream.
  45. * MemoryBitStream : an implementation of OutBitStream.
  46. * HuffmanCoding calculates Huffman codes.
  47. * Heap : used to implemnt HuffmanCoding.
  48. */
  49.  
  50. sealed class Deflator
  51. {
  52. public static void Deflate( byte [] input, OutBitStream output, int format )
  53. {
  54. Deflator d = new Deflator( input, output );
  55. if ( format == 1 ) output.WriteBits( 16, 0x9c78 ); // RFC 1950 bytes.
  56. d.FindMatches( input );
  57. d.Buffered = input.Length;
  58. while ( !d.OutputBlock( true ) );
  59. if ( format == 1 )
  60. {
  61. output.Pad( 8 );
  62. output.WriteBits( 32, Adler32( input ) ); // RFC 1950 checksum.
  63. }
  64. }
  65.  
  66. // Private constants.
  67.  
  68. // RFC 1951 limits.
  69. private const int MinMatch = 3;
  70. private const int MaxMatch = 258;
  71. private const int MaxDistance = 0x8000;
  72.  
  73. private const int StartBlockSize = 0x1000; // Initial blocksize, actual may be larger or smaller. Need not be power of two.
  74. private const bool DynamicBlockSize = true;
  75. private const int MaxBufferSize = 0x8000; // Must be power of 2.
  76.  
  77. // Instead of initialising LZ77 hashTable and link arrays to -(MaxDistance+1), EncodePosition
  78. // is added when storing a value and subtracted when retrieving a value.
  79. // This means a default value of 0 will always be more distant than MaxDistance.
  80. private const int EncodePosition = MaxDistance + 1;
  81.  
  82. // Private fields.
  83.  
  84. private byte [] Input;
  85. private OutBitStream Output;
  86.  
  87. private int Buffered; // How many Input bytes have been processed to intermediate buffer.
  88. private int Finished; // How many Input bytes have been written to Output.
  89.  
  90. // Intermediate circular buffer for storing LZ77 matches.
  91. private int [] PositionBuffer;
  92. private ushort [] LengthBuffer;
  93. private ushort [] DistanceBuffer;
  94. private int BufferMask;
  95. private int BufferWrite, BufferRead; // Indexes for writing and reading.
  96.  
  97. // Private functions and classes.
  98.  
  99. private Deflator( byte [] input, OutBitStream output )
  100. {
  101. Input = input;
  102. Output = output;
  103.  
  104. int bufferSize = CalcBufferSize( input.Length / 3, MaxBufferSize );
  105. PositionBuffer = new int[ bufferSize ];
  106. LengthBuffer = new ushort[ bufferSize ];
  107. DistanceBuffer = new ushort[ bufferSize ];
  108. BufferMask = bufferSize - 1;
  109. }
  110.  
  111. public static int CalcBufferSize( int n, int max )
  112. // Calculates a power of 2 >= n, but not more than max.
  113. {
  114. if ( n >= max ) return max;
  115. int result = 1;
  116. while ( result < n ) result = result << 1;
  117. return result;
  118. }
  119.  
  120. private void FindMatches( byte [] input ) // LZ77 compression.
  121. {
  122. if ( input.Length < MinMatch ) return;
  123. int limit = input.Length - 2;
  124.  
  125. int hashShift = CalcHashShift( limit * 2 );
  126. uint hashMask = ( 1u << ( MinMatch * hashShift ) ) - 1;
  127.  
  128. int [] hashTable = new int[ hashMask + 1 ];
  129. int [] link = new int[ limit ];
  130.  
  131. int position = 0; // position in input.
  132. uint hash = ( (uint)input[ 0 ] << hashShift ) + input[ 1 ];
  133.  
  134. while ( position < limit )
  135. {
  136. hash = ( ( hash << hashShift ) + input[ position + 2 ] ) & hashMask;
  137. int hashEntry = hashTable[ hash ];
  138. hashTable[ hash ] = position + EncodePosition;
  139. if ( position >= hashEntry ) // Equivalent to position - ( hashEntry - EncodePosition ) > MaxDistance.
  140. {
  141. position += 1;
  142. continue;
  143. }
  144. link[ position ] = hashEntry;
  145.  
  146. int distance, match = BestMatch( input, link, hashEntry - EncodePosition, position, out distance );
  147. position += 1;
  148. if ( match < MinMatch ) continue;
  149.  
  150. // "Lazy matching" RFC 1951 p.15 : if there are overlapping matches, there is a choice over which of the match to use.
  151. // Example: "abc012bc345.... abc345". Here abc345 can be encoded as either [abc][345] or as a[bc345].
  152. // Since a range typically needs more bits to encode than a single literal, choose the latter.
  153. while ( position < limit )
  154. {
  155. hash = ( ( hash << hashShift ) + input[ position + 2 ] ) & hashMask;
  156. hashEntry = hashTable[ hash ];
  157. hashTable[ hash ] = position + EncodePosition;
  158. if ( position >= hashEntry ) break;
  159. link[ position ] = hashEntry;
  160.  
  161. int distance2, match2 = BestMatch( input, link, hashEntry - EncodePosition, position, out distance2 );
  162. if ( match2 > match || match2 == match && distance2 < distance )
  163. {
  164. match = match2;
  165. distance = distance2;
  166. position += 1;
  167. }
  168. else break;
  169. }
  170.  
  171. int copyEnd = SaveMatch( position - 1, match, distance );
  172. if ( copyEnd > limit ) copyEnd = limit;
  173.  
  174. position += 1;
  175.  
  176. // Advance to end of copied section.
  177. while ( position < copyEnd )
  178. {
  179. hash = ( ( hash << hashShift ) + input[ position + 2 ] ) & hashMask;
  180. link[ position ] = hashTable[ hash ];
  181. hashTable[ hash ] = position + EncodePosition;
  182. position += 1;
  183. }
  184. }
  185. }
  186.  
  187. private static int BestMatch( byte [] input, int [] link, int oldPosition, int position, out int distance )
  188. {
  189. int avail = input.Length - position;
  190. if ( avail > MaxMatch ) avail = MaxMatch;
  191.  
  192. int bestMatch = 0, bestDistance = 0;
  193.  
  194. while ( true )
  195. {
  196. if ( input[ position + bestMatch ] == input[ oldPosition + bestMatch ] )
  197. {
  198. int match = MatchLength( input, position, oldPosition );
  199. if ( match > bestMatch )
  200. {
  201. bestMatch = match;
  202. bestDistance = position - oldPosition;
  203. if ( bestMatch == avail ) break;
  204. }
  205. }
  206. oldPosition = link[ oldPosition ];
  207. if ( position >= oldPosition ) break;
  208. oldPosition -= EncodePosition;
  209. }
  210. distance = bestDistance;
  211. return bestMatch;
  212. }
  213.  
  214. private static int MatchLength( byte [] input, int p, int q )
  215. {
  216. int end = input.Length;
  217. if ( end > p + MaxMatch ) end = p + MaxMatch;
  218. int pstart = p;
  219. while ( p < end && input[ p ] == input [ q ] )
  220. {
  221. p += 1;
  222. q += 1;
  223. }
  224. return p - pstart;
  225. }
  226.  
  227. private static int CalcHashShift( int n )
  228. {
  229. int p = 1;
  230. int result = 0;
  231. while ( n > p )
  232. {
  233. p = p << MinMatch;
  234. result += 1;
  235. if ( result == 6 ) break;
  236. }
  237. return result;
  238. }
  239.  
  240. private int SaveMatch ( int position, int length, int distance )
  241. // Called from FindMatches to save a <length,distance> match. Returns position + length.
  242. {
  243. // System.Console.WriteLine( "SaveMatch at " + position + " length=" + length + " distance=" + distance );
  244. int i = BufferWrite;
  245. PositionBuffer[ i ] = position;
  246. LengthBuffer[ i ] = (ushort) length;
  247. DistanceBuffer[ i ] = (ushort) distance;
  248. i = ( i + 1 ) & BufferMask;
  249. if ( i == BufferRead ) OutputBlock( false );
  250. BufferWrite = i;
  251. position += length;
  252. Buffered = position;
  253. return position;
  254. }
  255.  
  256. private bool OutputBlock( bool last )
  257. {
  258. int blockSize = Buffered - Finished; // Uncompressed size in bytes.
  259.  
  260. if ( blockSize > StartBlockSize )
  261. {
  262. blockSize = ( last && blockSize < StartBlockSize*2 ) ? blockSize >> 1 : StartBlockSize;
  263. }
  264.  
  265. Block b;
  266. int bits; // Compressed size in bits.
  267.  
  268. // While block construction fails, reduce blockSize.
  269. while ( true )
  270. {
  271. b = new Block( this, blockSize, null, out bits );
  272. if ( bits >= 0 ) break;
  273. blockSize -= blockSize / 3;
  274. }
  275.  
  276. // Investigate larger block size.
  277. while ( b.End < Buffered && DynamicBlockSize )
  278. {
  279. // b2 is a block which starts just after b.
  280. int bits2; Block b2 = new Block( this, blockSize, b, out bits2 );
  281. if ( bits2 < 0 ) break;
  282.  
  283. // b3 is the block which encodes b and b2 together.
  284. int bits3; Block b3 = new Block( this, b2.End - b.Start, null, out bits3 );
  285. if ( bits3 < 0 ) break;
  286.  
  287. if ( bits3 > bits + bits2 ) break;
  288.  
  289. bits = bits3;
  290. b = b3;
  291. blockSize += blockSize;
  292. }
  293.  
  294. // Output the block.
  295. if ( b.End < Buffered ) last = false;
  296. b.WriteBlock( this, last );
  297. return last;
  298. }
  299.  
  300. public static uint Adler32( byte [] b ) // Checksum function per RFC 1950.
  301. {
  302. uint s1 = 1, s2 = 0;
  303. for ( int i = 0; i < b.Length; i += 1 )
  304. {
  305. s1 = ( s1 + b[ i ] ) % 65521;
  306. s2 = ( s2 + s1 ) % 65521;
  307. }
  308. return s2 * 65536 + s1;
  309. }
  310.  
  311. private class Block
  312. {
  313. public readonly int Start, End; // Range of input encoded.
  314.  
  315. public Block( Deflator d, int blockSize, Block previous, out int bits )
  316. // The block is not immediately output, to allow caller to try different block sizes.
  317. // Instead, the number of bits neeed to encoded the block is returned ( excluding "extra" bits ).
  318. {
  319. Output = d.Output;
  320. bits = -1;
  321.  
  322. if ( previous == null )
  323. {
  324. Start = d.Finished;
  325. BufferStart = d.BufferRead;
  326. }
  327. else
  328. {
  329. Start = previous.End;
  330. BufferStart = previous.BufferEnd;
  331. }
  332.  
  333. int avail = d.Buffered - Start;
  334. if ( blockSize > avail ) blockSize = avail;
  335.  
  336. End = TallyFrequencies( d, blockSize );
  337. Lit.Used[ 256 ] += 1; // End of block code.
  338.  
  339. if ( Lit.ComputeCodes() || Dist.ComputeCodes() ) return;
  340.  
  341. if ( Dist.Count == 0 ) Dist.Count = 1;
  342.  
  343. // Compute length encoding.
  344. DoLengthPass( 1 );
  345. if ( Len.ComputeCodes() ) return;
  346.  
  347. // The length codes are permuted before being stored ( so that # of trailing zeroes is likely to be more ).
  348. Len.Count = 19; while ( Len.Count > 4 && Len.Bits[ ClenAlphabet[ Len.Count - 1 ] ] == 0 ) Len.Count -= 1;
  349.  
  350. bits = 17 + 3 * Len.Count + Len.Total() + Lit.Total() + Dist.Total();
  351. }
  352.  
  353. public void WriteBlock( Deflator d, bool last )
  354. {
  355. OutBitStream output = Output;
  356. output.WriteBits( 1, last ? 1u : 0u );
  357. output.WriteBits( 2, 2 );
  358. output.WriteBits( 5, (uint)( Lit.Count - 257 ) );
  359. output.WriteBits( 5, (uint)( Dist.Count - 1 ) );
  360. output.WriteBits( 4, (uint)( Len.Count - 4 ) );
  361.  
  362. for ( int i = 0; i < Len.Count; i += 1 )
  363. output.WriteBits( 3, Len.Bits[ ClenAlphabet[ i ] ] );
  364.  
  365. DoLengthPass( 2 );
  366. PutCodes( d );
  367. output.WriteBits( Lit.Bits[ 256 ], Lit.Codes[ 256 ] ); // End of block code
  368. }
  369.  
  370. // Block private fields and constants.
  371.  
  372. private OutBitStream Output;
  373. private int BufferStart, BufferEnd;
  374.  
  375. // Huffman codings : Lit = Literal or Match Code, Dist = Distance code, Len = Length code.
  376. HuffmanCoding Lit = new HuffmanCoding(15,288), Dist = new HuffmanCoding(15,32), Len = new HuffmanCoding(7,19);
  377.  
  378. // Counts for code length encoding.
  379. private int LengthPass, PreviousLength, ZeroRun, Repeat;
  380.  
  381. // RFC 1951 constants.
  382. private readonly static byte [] ClenAlphabet = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
  383. private readonly static byte [] MatchExtra = { 0,0,0,0, 0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3, 4,4,4,4, 5,5,5,5, 0 };
  384. private readonly static ushort [] MatchOff = { 3,4,5,6, 7,8,9,10, 11,13,15,17, 19,23,27,31, 35,43,51,59, 67,83,99,115,
  385. 131,163,195,227, 258, 0xffff };
  386. private readonly static byte [] DistExtra = { 0,0,0,0, 1,1,2,2, 3,3,4,4, 5,5,6,6, 7,7,8,8, 9,9,10,10, 11,11,12,12, 13,13 };
  387. private readonly static ushort [] DistOff = { 1,2,3,4, 5,7,9,13, 17,25,33,49, 65,97,129,193, 257,385,513,769,
  388. 1025,1537,2049,3073, 4097,6145,8193,12289, 16385,24577, 0xffff };
  389.  
  390. // Block private functions.
  391.  
  392. private int TallyFrequencies( Deflator d, int blockSize )
  393. {
  394. int position = Start;
  395. int end = position + blockSize;
  396.  
  397. int bufferRead = BufferStart;
  398. while ( position < end && bufferRead != d.BufferWrite )
  399. {
  400. int matchPosition = d.PositionBuffer[ bufferRead ];
  401. if ( matchPosition >= end ) break;
  402.  
  403. int length = d.LengthBuffer[ bufferRead ];
  404. int distance = d.DistanceBuffer[ bufferRead ];
  405. bufferRead = ( bufferRead + 1 ) & d.BufferMask;
  406.  
  407. byte [] input = d.Input;
  408. while ( position < matchPosition )
  409. {
  410. Lit.Used[ input[ position ] ] += 1;
  411. position += 1;
  412. }
  413.  
  414. position += length;
  415.  
  416. // Compute match and distance codes.
  417. int mc = 0; while ( length >= MatchOff[ mc ] ) mc += 1; mc -= 1;
  418. int dc = 29; while ( distance < DistOff[ dc ] ) dc -= 1;
  419.  
  420. Lit.Used[ 257 + mc ] += 1;
  421. Dist.Used[ dc ] += 1;
  422. }
  423.  
  424. while ( position < end )
  425. {
  426. Lit.Used[ d.Input[ position ] ] += 1;
  427. position += 1;
  428. }
  429.  
  430. BufferEnd = bufferRead;
  431. return position;
  432. }
  433.  
  434. private void PutCodes( Deflator d )
  435. {
  436. byte [] input = d.Input;
  437. OutBitStream output = d.Output;
  438.  
  439. int position = Start;
  440. int bufferRead = BufferStart;
  441.  
  442. while ( position < End && bufferRead != d.BufferWrite )
  443. {
  444. int matchPosition = d.PositionBuffer[ bufferRead ];
  445.  
  446. if ( matchPosition >= End ) break;
  447.  
  448. int length = d.LengthBuffer[ bufferRead ];
  449. int distance = d.DistanceBuffer[ bufferRead ];
  450.  
  451. bufferRead = ( bufferRead + 1 ) & d.BufferMask;
  452.  
  453. while ( position < matchPosition )
  454. {
  455. byte b = d.Input[ position ];
  456. output.WriteBits( Lit.Bits[ b ], Lit.Codes[ b ] );
  457. position += 1;
  458. }
  459. position += length;
  460.  
  461. // Compute match and distance codes.
  462. int mc = 0; while ( length >= MatchOff[ mc ] ) mc += 1; mc -= 1;
  463. int dc = 29; while ( distance < DistOff[ dc ] ) dc -= 1;
  464.  
  465. output.WriteBits( Lit.Bits[ 257 + mc ], Lit.Codes[ 257 + mc ] );
  466. output.WriteBits( MatchExtra[ mc ], (uint)(length-MatchOff[ mc ] ) );
  467. output.WriteBits( Dist.Bits[ dc ], Dist.Codes[ dc ] );
  468. output.WriteBits( DistExtra[ dc ], (uint)(distance-DistOff[ dc ] ) );
  469. }
  470.  
  471. while ( position < End )
  472. {
  473. byte b = input[ position ];
  474. output.WriteBits( Lit.Bits[ b ], Lit.Codes[ b ] );
  475. position += 1;
  476. }
  477.  
  478. d.BufferRead = bufferRead;
  479. d.Finished = position;
  480. }
  481.  
  482. // Run length encoding of code lengths - RFC 1951, page 13.
  483.  
  484. private void DoLengthPass( int pass )
  485. {
  486. LengthPass = pass;
  487. EncodeLengths( Lit.Count, Lit.Bits, true );
  488. EncodeLengths( Dist.Count, Dist.Bits, false );
  489. }
  490.  
  491. private void PutLength( int code )
  492. {
  493. if ( LengthPass == 1 )
  494. Len.Used[ code ] += 1;
  495. else
  496. Output.WriteBits( Len.Bits[ code ], Len.Codes[ code ] );
  497. }
  498.  
  499. private void EncodeLengths( int n, byte [] lengths, bool isLit )
  500. {
  501. if ( isLit )
  502. {
  503. PreviousLength = 0;
  504. ZeroRun = 0;
  505. Repeat = 0;
  506. }
  507. for ( int i = 0; i < n; i += 1 )
  508. {
  509. int length = lengths[ i ];
  510. if ( length == 0 )
  511. {
  512. EncodeRepeat();
  513. ZeroRun += 1;
  514. PreviousLength = 0;
  515. }
  516. else if ( length == PreviousLength )
  517. {
  518. Repeat += 1;
  519. }
  520. else
  521. {
  522. EncodeZeroRun();
  523. EncodeRepeat();
  524. PutLength( length );
  525. PreviousLength = length;
  526. }
  527. }
  528. if ( !isLit )
  529. {
  530. EncodeZeroRun();
  531. EncodeRepeat();
  532. }
  533. }
  534.  
  535. private void EncodeRepeat()
  536. {
  537. while ( Repeat > 0 )
  538. {
  539. if ( Repeat < 3 )
  540. {
  541. PutLength( PreviousLength );
  542. Repeat -= 1;
  543. }
  544. else
  545. {
  546. int x = Repeat;
  547. if ( x > 6 ) x = 6;
  548. PutLength( 16 );
  549. if ( LengthPass == 2 )
  550. {
  551. Output.WriteBits( 2, (uint)( x - 3 ) );
  552. }
  553. Repeat -= x;
  554. }
  555. }
  556. }
  557.  
  558. private void EncodeZeroRun()
  559. {
  560. while ( ZeroRun > 0 )
  561. {
  562. if ( ZeroRun < 3 )
  563. {
  564. PutLength( 0 );
  565. ZeroRun -= 1;
  566. }
  567. else if ( ZeroRun < 11 )
  568. {
  569. PutLength( 17 );
  570. if ( LengthPass == 2 ) Output.WriteBits( 3, (uint)( ZeroRun - 3 ) );
  571. ZeroRun = 0;
  572. }
  573. else
  574. {
  575. int x = ZeroRun;
  576. if ( x > 138 ) x = 138;
  577. PutLength( 18 );
  578. if ( LengthPass == 2 ) Output.WriteBits( 7, (uint)( x - 11 ) );
  579. ZeroRun -= x;
  580. }
  581. }
  582. }
  583. } // end class Block
  584.  
  585. } // end class Deflator
  586.  
  587.  
  588. // ******************************************************************************
  589.  
  590. struct HuffmanCoding // Variable length coding.
  591. {
  592. public int Count; // Number of used symbols.
  593. public int [] Used; // Count of how many times a symbol is used in the block being encoded.
  594. public byte [] Bits; // Number of bits used to encode a symbol.
  595. public ushort [] Codes; // Huffman code for a symbol ( bit 0 is most significant ).
  596. private int Limit; // Maxiumum number of bits for a code.
  597.  
  598. public HuffmanCoding( int limit, int symbols )
  599. {
  600. Limit = limit;
  601. Count = symbols;
  602. Used = new int[ symbols ];
  603. Bits = new byte[ symbols ];
  604. Codes = new ushort[ symbols ];
  605. }
  606.  
  607. public int Total()
  608. {
  609. int result = 0, count = Count;
  610. for ( int i = 0; i < count; i += 1 )
  611. result += Used[i] * Bits[i];
  612. return result;
  613. }
  614.  
  615. public bool ComputeCodes() // returns true if Limit is exceeded.
  616. {
  617. int count = Count;
  618.  
  619. Heap<TreeNode> heap = new Heap<TreeNode>( count, TreeNode.LessThan );
  620.  
  621. for ( int i = 0; i < Count; i += 1 )
  622. {
  623. int used = Used[ i ];
  624. if ( used > 0 ) heap.Insert( new Leaf( (ushort)i, used ) );
  625. }
  626.  
  627. int maxBits = 0;
  628.  
  629. if ( heap.Count == 1 )
  630. {
  631. heap.Remove().GetBits( Bits, 1 );
  632. maxBits = 1;
  633. }
  634. else if ( heap.Count > 1 )
  635. {
  636. do // Keep pairing the lowest frequency TreeNodes.
  637. {
  638. heap.Insert( new Branch( heap.Remove(), heap.Remove() ) );
  639. } while ( heap.Count > 1 );
  640.  
  641. TreeNode root = heap.Remove();
  642. maxBits = root.Depth;
  643. if ( maxBits > Limit ) return true;
  644. root.GetBits( Bits, 0 ); // Walk the tree to find the code lengths (Bits).
  645. }
  646.  
  647. // Compute codes, code below is from RFC 1951 page 7.
  648.  
  649. int [] bl_count = new int[ maxBits + 1 ];
  650. for ( int i = 0; i < count; i += 1 ) bl_count[ Bits[ i ] ] += 1;
  651.  
  652. int [] next_code = new int[ maxBits + 1 ];
  653. int code = 0; bl_count[ 0 ] = 0;
  654. for ( int i = 0; i < maxBits; i += 1 )
  655. {
  656. code = ( code + bl_count[ i ] ) << 1;
  657. next_code[ i+1 ] = code;
  658. }
  659.  
  660. for ( int i = 0; i < count; i += 1 )
  661. {
  662. int length = Bits[ i ];
  663. if ( length != 0 )
  664. {
  665. Codes[ i ] = (ushort)Reverse( next_code[ length ], length );
  666. next_code[ length ] += 1;
  667. }
  668. }
  669.  
  670. // Reduce count if there are unused symbols.
  671. while ( count > 0 && Bits[ count - 1 ] == 0 ) count -= 1;
  672. Count = count;
  673.  
  674. //System.Console.WriteLine( "HuffEncoder.ComputeCodes" );
  675. // for ( int i = 0; i < count; i += 1 ) if ( Bits[ i ] > 0 )
  676. // System.Console.WriteLine( "i=" + i + " len=" + Bits[ i ] + " tc=" + Codes[ i ].ToString("X") + " freq=" + Used[ i ] );
  677.  
  678. return false;
  679. }
  680.  
  681. private static int Reverse( int x, int bits )
  682. // Reverse a string of bits ( ready to be output as Huffman code ).
  683. {
  684. int result = 0;
  685. for ( int i = 0; i < bits; i += 1 )
  686. {
  687. result <<= 1;
  688. result |= x & 1;
  689. x >>= 1;
  690. }
  691. return result;
  692. }
  693.  
  694. private abstract class TreeNode
  695. {
  696. public int Used;
  697. public byte Depth;
  698.  
  699. public static bool LessThan( TreeNode a, TreeNode b )
  700. {
  701. return a.Used < b.Used || a.Used == b.Used && a.Depth < b.Depth;
  702. }
  703.  
  704. public abstract void GetBits( byte [] nbits, int length );
  705.  
  706. }
  707.  
  708. private class Leaf : TreeNode
  709. {
  710. public ushort Code;
  711.  
  712. public Leaf( ushort code, int used )
  713. {
  714. Code = code;
  715. Used = used;
  716. }
  717.  
  718. public override void GetBits( byte [] nbits, int length )
  719. {
  720. nbits[ Code ] = (byte)length;
  721. }
  722. } // end class Leaf
  723.  
  724. private class Branch : TreeNode
  725. {
  726. TreeNode Left, Right;
  727.  
  728. public Branch( TreeNode left, TreeNode right )
  729. {
  730. Left = left;
  731. Right = right;
  732. Used = left.Used + right.Used;
  733. Depth = (byte)( 1 + ( left.Depth > right.Depth ? left.Depth : right.Depth ) );
  734. }
  735.  
  736. public override void GetBits( byte [] nbits, int length )
  737. {
  738. Left.GetBits( nbits, length + 1 );
  739. Right.GetBits( nbits, length + 1 );
  740. }
  741. } // end class Branch
  742.  
  743. } // end struct HuffmanCoding
  744.  
  745.  
  746. // ******************************************************************************
  747.  
  748.  
  749. sealed class Heap<T> // An array organised so the smallest element can be efficiently removed.
  750. {
  751. public delegate bool DLessThan( T a, T b );
  752.  
  753. public int Count { get{ return _Count; } }
  754. private int _Count;
  755. private T [] Array;
  756. private DLessThan LessThan;
  757.  
  758. public Heap ( int capacity, DLessThan lessThan )
  759. {
  760. Array = new T[ capacity ];
  761. LessThan = lessThan;
  762. }
  763.  
  764. public void Insert( T e )
  765. {
  766. int j = _Count++;
  767. while ( j > 0 )
  768. {
  769. int p = ( j - 1 ) / 2; // Index of parent.
  770. T pe = Array[ p ];
  771. if ( !LessThan( e, pe ) ) break;
  772. Array[ j ] = pe; // Demote parent.
  773. j = p;
  774. }
  775. Array[ j ] = e;
  776. }
  777.  
  778. public T Remove() // Returns the smallest element.
  779. {
  780. T result = Array[ 0 ];
  781. _Count -= 1;
  782. T e = Array[ _Count ];
  783. Array[ _Count ] = default(T);
  784. int j = 0;
  785. while ( true )
  786. {
  787. int c = j * 2 + 1; if ( c >= _Count ) break;
  788. T ce = Array[ c ];
  789. if ( c + 1 < _Count )
  790. {
  791. T ce2 = Array[ c + 1 ];
  792. if ( LessThan( ce2, ce ) ) { c += 1; ce = ce2; }
  793. }
  794. if ( !LessThan( ce, e ) ) break;
  795. Array[ j ] = ce; j = c;
  796. }
  797. Array[ j ] = e;
  798. return result;
  799. }
  800.  
  801. } // end class Heap
  802.  
  803.  
  804. // ******************************************************************************
  805.  
  806.  
  807. abstract class OutBitStream
  808. {
  809. public void WriteBits( int n, ulong value )
  810. // Write first n ( 0 <= n <= 64 ) bits of value to stream, least significant bit is written first.
  811. // Unused bits of value must be zero, i.e. value must be in range 0 .. 2^n-1.
  812. {
  813. if ( n + BitsInWord >= WordCapacity )
  814. {
  815. Save( Word | value << BitsInWord );
  816. int space = WordCapacity - BitsInWord;
  817. value >>= space;
  818. n -= space;
  819. Word = 0;
  820. BitsInWord = 0;
  821. }
  822. Word |= value << BitsInWord;
  823. BitsInWord += n;
  824. }
  825.  
  826. public void Pad( int n )
  827. // Pad with zero bits to n bit boundary where n is power of 2 in range 1,2,4..64, typically n=8.
  828. {
  829. int w = BitsInWord % n;
  830. if ( w > 0 ) WriteBits( n - w, 0 );
  831. }
  832.  
  833. public abstract void Save( ulong word );
  834.  
  835. protected const int WordSize = sizeof(ulong); // Size of Word in bytes.
  836. protected const int WordCapacity = WordSize * 8; // Number of bits that can be stored Word
  837.  
  838. protected ulong Word; // Bits are first stored in Word, when full, Word is saved.
  839. protected int BitsInWord; // Number of bits currently stored in Word.
  840. }
  841.  
  842.  
  843. // ******************************************************************************
  844.  
  845.  
  846. sealed class MemoryBitStream : OutBitStream
  847. {
  848. // ByteSize returns the current size in bytes.
  849. // CopyTo copies the contents to a Stream.
  850. // ToArray returns the contents as an array of bytes.
  851.  
  852. public int ByteSize()
  853. {
  854. return ( CompleteChunks * Chunk.Capacity + WordsInCurrentChunk ) * WordSize + ( BitsInWord + 7 ) / 8;
  855. }
  856.  
  857. public void CopyTo( System.IO.Stream s )
  858. {
  859. byte [] buffer = new byte [ WordSize ];
  860. for ( Chunk c = FirstChunk; c != null; c = c.Next )
  861. {
  862. int n = ( c == CurrentChunk ) ? WordsInCurrentChunk : Chunk.Capacity;
  863. for ( int j = 0; j < n; j += 1 )
  864. {
  865. ulong w = c.Words[ j ];
  866. unchecked
  867. {
  868. buffer[0] = (byte) w;
  869. buffer[1] = (byte)( w >> 8 );
  870. buffer[2] = (byte)( w >> 16 );
  871. buffer[3] = (byte)( w >> 24 );
  872. buffer[4] = (byte)( w >> 32 );
  873. buffer[5] = (byte)( w >> 40 );
  874. buffer[6] = (byte)( w >> 48 );
  875. buffer[7] = (byte)( w >> 56 );
  876. }
  877. s.Write( buffer, 0, 8 );
  878. }
  879. }
  880.  
  881. int biw = BitsInWord;
  882. ulong word = Word;
  883. while ( biw > 0 )
  884. {
  885. s.WriteByte( unchecked( (byte) word ) );
  886. word >>= 8;
  887. biw -= 8;
  888. }
  889. }
  890.  
  891. public byte [] ToArray()
  892. {
  893. byte [] buffer = new byte[ ByteSize() ];
  894. int i = 0;
  895.  
  896. for ( Chunk c = FirstChunk; c != null; c = c.Next )
  897. {
  898. int n = ( c == CurrentChunk ) ? WordsInCurrentChunk : Chunk.Capacity;
  899. for ( int j = 0; j < n; j += 1 )
  900. {
  901. ulong w = c.Words[ j ];
  902. unchecked
  903. {
  904. buffer[i++] = (byte) w;
  905. buffer[i++] = (byte)( w >> 8 );
  906. buffer[i++] = (byte)( w >> 16 );
  907. buffer[i++] = (byte)( w >> 24 );
  908. buffer[i++] = (byte)( w >> 32 );
  909. buffer[i++] = (byte)( w >> 40 );
  910. buffer[i++] = (byte)( w >> 48 );
  911. buffer[i++] = (byte)( w >> 56 );
  912. }
  913. }
  914. }
  915.  
  916. int biw = BitsInWord;
  917. ulong word = Word;
  918. while ( biw > 0 )
  919. {
  920. buffer[ i++ ] = unchecked( (byte) word );
  921. word >>= 8;
  922. biw -= 8;
  923. }
  924. return buffer;
  925. }
  926.  
  927. public MemoryBitStream()
  928. {
  929. FirstChunk = new Chunk();
  930. CurrentChunk = FirstChunk;
  931. }
  932.  
  933. public override void Save( ulong word )
  934. {
  935. if ( WordsInCurrentChunk == Chunk.Capacity )
  936. {
  937. Chunk nc = new Chunk();
  938. CurrentChunk.Next = nc;
  939. CurrentChunk = nc;
  940. CompleteChunks += 1;
  941. WordsInCurrentChunk = 0;
  942. }
  943. CurrentChunk.Words[ WordsInCurrentChunk++ ] = word;
  944. }
  945.  
  946. private int WordsInCurrentChunk; // Number of words stored in CurrentChunk.
  947. private int CompleteChunks; // Number of complete Chunks.
  948. private Chunk FirstChunk, CurrentChunk;
  949.  
  950. private class Chunk
  951. {
  952. public const int Capacity = 256;
  953. public ulong [] Words = new ulong[ Capacity ];
  954. public Chunk Next;
  955. }
  956.  
  957. } // end class MemoryBitStream
  958.  
  959. } // namespace
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement