Advertisement
Guest User

Untitled

a guest
Apr 21st, 2015
233
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.60 KB | None | 0 0
  1. package assignment12;
  2.  
  3. import java.io.DataInputStream;
  4. import java.io.DataOutputStream;
  5. import java.io.File;
  6. import java.io.FileInputStream;
  7. import java.io.FileOutputStream;
  8. import java.io.IOException;
  9. import java.io.InputStream;
  10. import java.io.PrintWriter;
  11. import java.util.ArrayList;
  12. import java.util.HashMap;
  13. import java.util.List;
  14. import java.util.Map;
  15. import java.util.PriorityQueue;
  16.  
  17. /**
  18. * Implements file compression and decompression using Huffman's algorithm,
  19. * which encodes and decodes each character of a file using a binary trie.
  20. *
  21. * @author Christian Luciani and Jiwon Nam
  22. */
  23. public class HuffmanTree {
  24.  
  25. /**
  26. * Represents a single node of a Huffman Tree.
  27. */
  28. private class Node implements Comparable<Node> {
  29.  
  30. public int symbol; // character to be encoded/decoded
  31.  
  32. public int weight; // number of occurrences of the character
  33.  
  34. public Node leftChild, rightChild, parent;
  35.  
  36. /**
  37. * Constructs a leaf node.
  38. */
  39. public Node(int _symbol, int _weight) {
  40. symbol = _symbol;
  41. weight = _weight;
  42. leftChild = rightChild = parent = null;
  43. }
  44.  
  45. /**
  46. * Constructs an internal node. Note that an internal node has a weight (sum
  47. * of the weights of its children) but no character (INCOMPLETE_CODE).
  48. *
  49. * @param left
  50. * - left child of the new node
  51. * @param right
  52. * - right child of the new node
  53. */
  54. public Node(Node left, Node right) {
  55. symbol = INCOMPLETE_CODE;
  56. weight = left.weight + right.weight;
  57. leftChild = left;
  58. rightChild = right;
  59. parent = null;
  60. }
  61.  
  62. /**
  63. * Returns a string containing all of the edges in the tree rooted at "this"
  64. * node, in DOT format.
  65. */
  66. public String generateDot() {
  67. String atNode = Character.toString((char) symbol);
  68. if (symbol == INCOMPLETE_CODE)
  69. atNode = " ";
  70. else if (symbol == EOF)
  71. atNode = "EOF";
  72. else if (symbol == '\n')
  73. atNode = "newline";
  74. else if (symbol == '\t')
  75. atNode = "tab";
  76. else if (symbol == ' ')
  77. atNode = "space";
  78. atNode += " " + weight;
  79. String ret = "\tnode" + hashCode() + " [label = \"<f0> |<f1> " + atNode
  80. + "|<f2> \"]\n";
  81. if (leftChild != null)
  82. ret += "\tnode" + hashCode() + ":f0 -- node" + leftChild.hashCode()
  83. + ":f1\n" + leftChild.generateDot();
  84. if (rightChild != null)
  85. ret += "\tnode" + hashCode() + ":f2 -- node" + rightChild.hashCode()
  86. + ":f1\n" + rightChild.generateDot();
  87.  
  88. return ret;
  89. }
  90.  
  91. /**
  92. * Compares two Huffman nodes, using weight.
  93. *
  94. * @param rhs
  95. * - right-hand side node
  96. * @return a value > 0 if this node is larger than rhs, a value < 0 if this
  97. * node is smaller than rhs, 0 if the nodes are equal
  98. */
  99. public int compareTo(Node rhs) {
  100.  
  101. int difference = weight - rhs.weight;
  102.  
  103. if(difference != 0){
  104. return difference;
  105. }
  106. else{
  107. return getLeft().symbol - rhs.getLeft().symbol;
  108. }
  109. }
  110.  
  111. private Node getLeft() {
  112. Node left = this;
  113.  
  114. while(left.leftChild != null){
  115. left = left.leftChild;
  116. }
  117. return left;
  118. }
  119. }
  120.  
  121. private Node root; // root of the Huffman tree
  122.  
  123. private Map<Integer, Node> symbols; // set of characters in the file mapped to
  124. // their corresponding Huffman nodes
  125.  
  126. private List<Integer> data; // list of characters in the file
  127.  
  128. private final static int ERROR = -3; // used to detect errors in Huffman
  129. // encoding
  130.  
  131. private final static int INCOMPLETE_CODE = -2; // used to detect internal
  132. // Huffman nodes (i.e., not a
  133. // leaf node and has no
  134. // character)
  135.  
  136. private final static int EOF = 0; // used to detect the end of a file
  137. // compressed using Huffman's algorithm
  138.  
  139. /**
  140. * Generates a compressed version of the input file.
  141. *
  142. * @param infile
  143. * - input file of (uncompressed) data
  144. * @param outfile
  145. * - output file of compressed data
  146. */
  147. public void compressFile(File infile, File outfile) {
  148. symbols = new HashMap<Integer, Node>();
  149. data = new ArrayList<Integer>();
  150.  
  151. try {
  152. DataOutputStream out = new DataOutputStream(new FileOutputStream(outfile));
  153.  
  154. // read characters and counts frequency of each
  155. readData(new FileInputStream(infile));
  156.  
  157. // build Huffman tree using frequency information
  158. createTree();
  159.  
  160. // write character and frequencies to beginning of compressed file
  161. writeEncodingInfo(out);
  162.  
  163. // for each character in the input file, encodes it using the Huffman tree
  164. // and writes the bit code
  165. BitOutputStream bout = new BitOutputStream(out);
  166. for (int ch : data)
  167. bout.writeBits(getCode(ch & 0xff));
  168. bout.close();
  169.  
  170. } catch (IOException e) {
  171. System.err.println(e);
  172. }
  173. }
  174.  
  175. /**
  176. * Generates a decompressed version of the input file
  177. *
  178. * @param infile
  179. * - input file of (compressed) data
  180. * @param outfile
  181. * - output file of decompressed data
  182. */
  183. public void decompressFile(File infile, File outfile) {
  184. symbols = new HashMap<Integer, Node>();
  185. data = new ArrayList<Integer>();
  186.  
  187. try {
  188. DataInputStream in = new DataInputStream(new FileInputStream(infile));
  189.  
  190. // read characters and frequency information
  191. readEncodingInfo(in);
  192.  
  193. // build Huffman tree using frequency information
  194. createTree();
  195.  
  196. // read Huffman codes corresponding to each character in original file
  197. BitInputStream bin = new BitInputStream(in);
  198. readCompressedData(bin);
  199. bin.close();
  200.  
  201. // use the codes to find each character in the Huffman tree and print the
  202. // characters
  203. FileOutputStream out = new FileOutputStream(outfile);
  204. for (int i : data)
  205. out.write(i);
  206. out.close();
  207. } catch (IOException e) {
  208. System.err.println(e);
  209. }
  210. }
  211.  
  212. /**
  213. * COMPRESSION
  214. *
  215. * Reads all characters, while maintaining a count of the occurrences of each
  216. * character (AKA the character's weight).
  217. *
  218. * @param in
  219. * - stream for the input file
  220. * @throws IOException
  221. */
  222. private void readData(InputStream in) throws IOException {
  223. int ch;
  224. // read characters until end of file
  225. while ((ch = in.read()) != -1) {
  226. // if character had not yet been seen, put it in the tree (weight = 1)
  227. if (symbols.get(ch) == null)
  228. symbols.put(ch, new Node(ch, 1));
  229. // if character has already been seen, increment weight
  230. else
  231. symbols.get(ch).weight++;
  232. // keep track of all characters (in order)
  233. data.add(ch);
  234. }
  235.  
  236. // add end-of-file marker to tree and list of characters
  237. // this lets use know when to stop during decompression
  238. symbols.put(EOF, new Node(EOF, 1));
  239. data.add(EOF);
  240. }
  241.  
  242. /**
  243. * DECOMPRESSION
  244. *
  245. * Reads the Huffman codes corresponding to each character in original file.
  246. *
  247. * @param in
  248. * - stream for the input file
  249. * @throws IOException
  250. */
  251. private void readCompressedData(BitInputStream in) throws IOException {
  252. String bits = "";
  253. int bit;
  254. int decode = 0;
  255.  
  256. // read bits until end of file
  257. while ((bit = in.readBit()) != -1) {
  258. if (bit == 0)
  259. bits += "0";
  260. else
  261. bits += "1";
  262.  
  263. // follow the path in the Huffman tree indicated by the bit code and get
  264. // the character encoded
  265. decode = getChar(bits);
  266.  
  267. if (decode == INCOMPLETE_CODE) // if the path leads to an internal node,
  268. // not a complete code; get next bit
  269. continue;
  270. else if (decode == ERROR)
  271. throw new IOException("Decoding error");
  272. else if (decode == EOF)
  273. return;
  274.  
  275. // keep track of each character decoded
  276. data.add(decode);
  277. bits = "";
  278. }
  279. }
  280.  
  281. /**
  282. * COMPRESSION
  283. *
  284. * Writes the character and weight information to the output file, so that the
  285. * Huffman tree can reconstructed at the time of decompression.
  286. *
  287. * @param out
  288. * - stream for the output file
  289. * @throws IOException
  290. */
  291. private void writeEncodingInfo(DataOutputStream out) throws IOException {
  292. for (Map.Entry<Integer, Node> e : symbols.entrySet()) {
  293. out.writeByte(e.getKey());
  294. out.writeInt(e.getValue().weight);
  295. }
  296.  
  297. // special code to indicate end of file
  298. out.writeByte(0);
  299. out.writeInt(0);
  300. }
  301.  
  302. /**
  303. * DECOMPRESSION
  304. *
  305. * Reads the character and weight information at the beginning of the
  306. * compressed file, so that the Huffman tree can reconstructed.
  307. *
  308. * @param in
  309. * - stream for the input file
  310. * @throws IOException
  311. */
  312. private void readEncodingInfo(DataInputStream in) throws IOException {
  313. int ch;
  314. int num;
  315. while (true) {
  316. ch = in.readByte();
  317. num = in.readInt();
  318. if (num == 0) // EOF
  319. return;
  320. symbols.put(ch, new Node(ch, num));
  321. }
  322. }
  323.  
  324. /**
  325. * COMPRESSION and DECOMPRESSION
  326. *
  327. * Constructs a Huffman tree to represent bit codes for each character. (See
  328. * algorithm and examples in Lecture 22.)
  329. */
  330. private void createTree() {
  331. PriorityQueue<Node> pq = new PriorityQueue<Node>();
  332. pq.addAll(symbols.values());
  333.  
  334. while(pq.size() > 1){
  335. Node rightNode = pq.poll();
  336. Node leftNode = pq.poll();
  337. Node tempNode = new Node(rightNode, leftNode);
  338.  
  339. leftNode.parent = tempNode;
  340. rightNode.parent = tempNode;
  341. pq.offer(tempNode);
  342. }
  343.  
  344. root = pq.poll();
  345. // FILL IN -- use "pq" to construct a Huffman tree.
  346.  
  347. // Repeatedly merge the lowest weight trees and add the new tree
  348. // with new weight to the priority queue.
  349. // At the end, set the variable "root" (already declared in this class)
  350. // to be the full tree.
  351.  
  352. // TO VISUALIZE the binary trie, use the huffmanToDot method
  353. }
  354.  
  355. /**
  356. * COMPRESSION
  357. *
  358. * Returns the bit code for a character, by traversing the path from the
  359. * character's leaf node up to the root of the tree. Encountering a left child
  360. * causes a 0 to be pre-appended to the bit code, and encountering a right
  361. * child causes a 1 to be pre-appended. (See algorithm and examples in Lecture
  362. * 22.)
  363. *
  364. * @param ch
  365. * - character to be encoded
  366. */
  367. private int[] getCode(int ch) {
  368.  
  369. Node tempNode = symbols.get(ch);
  370. Node parentNode = tempNode;
  371. String bitcode = "";
  372.  
  373. while(tempNode.parent != null){
  374.  
  375. parentNode = tempNode.parent;
  376.  
  377. if(parentNode.getLeft() == tempNode){
  378. bitcode = 0 + bitcode;
  379. }
  380. else{
  381. bitcode = 1 + bitcode;
  382. }
  383. tempNode = tempNode.parent;
  384. }
  385.  
  386. char[] bit = bitcode.toCharArray();
  387. int[] code = new int[bit.length];
  388.  
  389. for(int i=0; i < bit.length; i++){
  390. code[i] = bit[i] - '0';
  391. }
  392.  
  393. return code;
  394. }
  395.  
  396. /**
  397. * DECOMPRESSION
  398. *
  399. * Returns the character for the bit code, by traversing the path from the
  400. * root of the tree to the character's leaf node. A 0 in the code causes the
  401. * path to go through a left child, and a 1 in the code causes the path to go
  402. * through a right child.
  403. *
  404. * @param code
  405. * - the bit code indicating the path from the root
  406. * @return the character encoded or ERROR if the path is not valid
  407. */
  408. private int getChar(String code) {
  409. Node curr = root;
  410.  
  411. for (int i = 0; curr != null && i < code.length(); i++)
  412. if (code.charAt(i) == '0')
  413. curr = curr.leftChild;
  414. else
  415. curr = curr.rightChild;
  416.  
  417. if (curr == null)
  418. return ERROR;
  419.  
  420. return curr.symbol;
  421. }
  422.  
  423. /**
  424. * Generates a DOT file for visualizing the Huffman tree.
  425. *
  426. * @param dotFilename
  427. * - filename of DOT file to generate
  428. */
  429. public void huffmanToDot(String dotFilename) {
  430. try {
  431. PrintWriter out = new PrintWriter(dotFilename);
  432. out.println("graph Tree {\n\tnode [shape=record]\n");
  433.  
  434. if (root == null)
  435. out.println("");
  436. else
  437. out.print(root.generateDot());
  438.  
  439. out.println("}");
  440. out.close();
  441. } catch (IOException e) {
  442. System.out.println(e);
  443. }
  444. }
  445. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement