Anophoo

Huffman

Jun 26th, 2016
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.36 KB | None | 0 0
  1. /**********************************************************
  2. * File: HuffmanEncoding.cpp
  3. *
  4. * Implementation of the functions from HuffmanEncoding.h.
  5. * Most (if not all) of the code that you write for this
  6. * assignment will go into this file.
  7. */
  8.  
  9. #include "HuffmanEncoding.h"
  10. #include "error.h"
  11.  
  12. /* Function: getFrequencyTable
  13. * Usage: Map<ext_char, int> freq = getFrequencyTable(file);
  14. * --------------------------------------------------------
  15. * Given an input stream containing text, calculates the
  16. * frequencies of each character within that text and stores
  17. * the result as a Map from ext_chars to the number of times
  18. * that the character appears.
  19. *
  20. * This function will also set the frequency of the PSEUDO_EOF
  21. * character to be 1, which ensures that any future encoding
  22. * tree built from these frequencies will have an encoding for
  23. * the PSEUDO_EOF character.
  24. */
  25. Map<ext_char, int> getFrequencyTable(istream& file) {
  26. Map<ext_char, int> result;
  27. char ch;
  28. while(file.get(ch)){
  29. if (result.containsKey(ch)) {
  30. int k = result.get(ch) + 1;
  31. result.put(ch, k);
  32. } else {
  33. result.put(ch, 1);
  34. }
  35. }
  36. result.put(PSEUDO_EOF, 1);
  37. return result;
  38. }
  39.  
  40. PriorityQueue<Node*> getPQueue(Map<ext_char, int> frequencies) {
  41. PriorityQueue<Node*> result;
  42. foreach (char ch in frequencies) {
  43. Node* tmp = new Node;
  44. tmp ->one = NULL;
  45. tmp ->zero = NULL;
  46. tmp ->character = ch;
  47. tmp ->weight = frequencies.get(ch);
  48. result.enqueue(tmp, frequencies.get(ch));
  49. }
  50. return result;
  51. }
  52.  
  53.  
  54. /* Function: buildEncodingTree
  55. * Usage: Node* tree = buildEncodingTree(frequency);
  56. * --------------------------------------------------------
  57. * Given a map from extended characters to frequencies,
  58. * constructs a Huffman encoding tree from those frequencies
  59. * and returns a pointer to the root.
  60. *
  61. * This function can assume that there is always at least one
  62. * entry in the map, since the PSEUDO_EOF character will always
  63. * be present.
  64. */
  65. Node* buildEncodingTree(Map<ext_char, int>& frequencies) {
  66. PriorityQueue<Node*> pq = getPQueue(frequencies);
  67. while (pq.size() >= 2) {
  68. Node* root = new Node;
  69. Node* tmp1 = pq.dequeue();
  70. Node* tmp2;
  71. if (pq.isEmpty()) {
  72. tmp2 = NULL;
  73. } else {
  74. tmp2 = pq.dequeue();
  75. }
  76. root -> one = tmp1;
  77. root -> zero = tmp2;
  78. root -> character = NOT_A_CHAR;
  79. root -> weight = tmp1 -> weight + tmp2 -> weight;
  80. pq.enqueue(root, root -> weight);
  81. }
  82. return pq.dequeue();
  83. }
  84.  
  85. /* Function: freeTree
  86. * Usage: freeTree(encodingTree);
  87. * --------------------------------------------------------
  88. * Deallocates all memory allocated for a given encoding
  89. * tree.
  90. */
  91. void freeTree(Node* root) {
  92. if (root != NULL) {
  93. freeTree(root -> one);
  94. freeTree(root -> zero);
  95. delete root;
  96. }
  97. }
  98.  
  99. /* Function: encodeFile
  100. * Usage: encodeFile(source, encodingTree, output);
  101. * --------------------------------------------------------
  102. * Encodes the given file using the encoding specified by the
  103. * given encoding tree, then writes the result one bit at a
  104. * time to the specified output file.
  105. *
  106. * This function can assume the following:
  107. *
  108. * - The encoding tree was constructed from the given file,
  109. * so every character appears somewhere in the encoding
  110. * tree.
  111. *
  112. * - The output file already has the encoding table written
  113. * to it, and the file cursor is at the end of the file.
  114. * This means that you should just start writing the bits
  115. * without seeking the file anywhere.
  116. */
  117. void encodeFile(istream& infile, Node* encodingTree, obstream& outfile) {
  118. // TODO: Implement this!
  119. }
  120.  
  121. /* Function: decodeFile
  122. * Usage: decodeFile(encodedFile, encodingTree, resultFile);
  123. * --------------------------------------------------------
  124. * Decodes a file that has previously been encoded using the
  125. * encodeFile function. You can assume the following:
  126. *
  127. * - The encoding table has already been read from the input
  128. * file, and the encoding tree parameter was constructed from
  129. * this encoding table.
  130. *
  131. * - The output file is open and ready for writing.
  132. */
  133. void decodeFile(ibstream& infile, Node* encodingTree, ostream& file) {
  134. // TODO: Implement this!
  135. }
  136.  
  137. /* Function: writeFileHeader
  138. * Usage: writeFileHeader(output, frequencies);
  139. * --------------------------------------------------------
  140. * Writes a table to the front of the specified output file
  141. * that contains information about the frequencies of all of
  142. * the letters in the input text. This information can then
  143. * be used to decompress input files once they've been
  144. * compressed.
  145. *
  146. * This function is provided for you. You are free to modify
  147. * it if you see fit, but if you do you must also update the
  148. * readFileHeader function defined below this one so that it
  149. * can properly read the data back.
  150. */
  151. void writeFileHeader(obstream& outfile, Map<ext_char, int>& frequencies) {
  152. /* The format we will use is the following:
  153. *
  154. * First number: Total number of characters whose frequency is being
  155. * encoded.
  156. * An appropriate number of pairs of the form [char][frequency][space],
  157. * encoding the number of occurrences.
  158. *
  159. * No information about PSEUDO_EOF is written, since the frequency is
  160. * always 1.
  161. */
  162.  
  163. /* Verify that we have PSEUDO_EOF somewhere in this mapping. */
  164. if (!frequencies.containsKey(PSEUDO_EOF)) {
  165. error("No PSEUDO_EOF defined.");
  166. }
  167.  
  168. /* Write how many encodings we're going to have. Note the space after
  169. * this number to ensure that we can read it back correctly.
  170. */
  171. outfile << frequencies.size() - 1 << ' ';
  172.  
  173. /* Now, write the letter/frequency pairs. */
  174. foreach (ext_char ch in frequencies) {
  175. /* Skip PSEUDO_EOF if we see it. */
  176. if (ch == PSEUDO_EOF) continue;
  177.  
  178. /* Write out the letter and its frequency. */
  179. outfile << char(ch) << frequencies[ch] << ' ';
  180. }
  181. }
  182.  
  183. /* Function: readFileHeader
  184. * Usage: Map<ext_char, int> freq = writeFileHeader(input);
  185. * --------------------------------------------------------
  186. * Reads a table to the front of the specified input file
  187. * that contains information about the frequencies of all of
  188. * the letters in the input text. This information can then
  189. * be used to reconstruct the encoding tree for that file.
  190. *
  191. * This function is provided for you. You are free to modify
  192. * it if you see fit, but if you do you must also update the
  193. * writeFileHeader function defined before this one so that it
  194. * can properly write the data.
  195. */
  196. Map<ext_char, int> readFileHeader(ibstream& infile) {
  197. /* This function inverts the mapping we wrote out in the
  198. * writeFileHeader function before. If you make any
  199. * changes to that function, be sure to change this one
  200. * too!
  201. */
  202. Map<ext_char, int> result;
  203.  
  204. /* Read how many values we're going to read in. */
  205. int numValues;
  206. infile >> numValues;
  207.  
  208. /* Skip trailing whitespace. */
  209. infile.get();
  210.  
  211. /* Read those values in. */
  212. for (int i = 0; i < numValues; i++) {
  213. /* Get the character we're going to read. */
  214. ext_char ch = infile.get();
  215.  
  216. /* Get the frequency. */
  217. int frequency;
  218. infile >> frequency;
  219.  
  220. /* Skip the space character. */
  221. infile.get();
  222.  
  223. /* Add this to the encoding table. */
  224. result[ch] = frequency;
  225. }
  226.  
  227. /* Add in 1 for PSEUDO_EOF. */
  228. result[PSEUDO_EOF] = 1;
  229. return result;
  230. }
  231.  
  232. /* Function: compress
  233. * Usage: compress(infile, outfile);
  234. * --------------------------------------------------------
  235. * Main entry point for the Huffman compressor. Compresses
  236. * the file whose contents are specified by the input
  237. * ibstream, then writes the result to outfile. Your final
  238. * task in this assignment will be to combine all of the
  239. * previous functions together to implement this function,
  240. * which should not require much logic of its own and should
  241. * primarily be glue code.
  242. */
  243. void compress(ibstream& infile, obstream& outfile) {
  244. // TODO: Implement this!
  245. }
  246.  
  247. /* Function: decompress
  248. * Usage: decompress(infile, outfile);
  249. * --------------------------------------------------------
  250. * Main entry point for the Huffman decompressor.
  251. * Decompresses the file whose contents are specified by the
  252. * input ibstream, then writes the decompressed version of
  253. * the file to the stream specified by outfile. Your final
  254. * task in this assignment will be to combine all of the
  255. * previous functions together to implement this function,
  256. * which should not require much logic of its own and should
  257. * primarily be glue code.
  258. */
  259. void decompress(ibstream& infile, ostream& outfile) {
  260. // TODO: Implement this!
  261. }
Advertisement
Add Comment
Please, Sign In to add comment