 # Untitled

Oct 18th, 2020
740
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. from ordered_list import *
2. from huffman_bit_writer import *
3.
4. class HuffmanNode:
5.     def __init__(self, char, freq):
6.         self.char = char   # stored as an integer - the ASCII character code value
7.         self.freq = freq   # the freqency associated with the node
8.         self.left = None   # Huffman tree (node) to the left
9.         self.right = None  # Huffman tree (node) to the right
10.
11.     def __eq__(self, other):
12.         '''Needed in order to be inserted into OrderedList'''
13.         return other is not None and self.char == other.char and self.freq == other.freq
14.
15.     def __lt__(self, other):
16.         '''Needed in order to be inserted into OrderedList'''
17.         if self.freq == other.freq:
18.             return self.char < other.char
19.         return self.freq < other.freq
20.
21.
22. def cnt_freq(filename):
23.     '''Opens a text file with a given file name (passed as a string) and counts the
24.    frequency of occurrences of all the characters within that file'''
25.     f = open(filename)
26.     cnt =  * 256
27.     while True:
29.         if not c:
30.             break
31.         cnt[ord(c)] += 1
32.     f.close()
33.     return cnt
34.
35.
36. def create_huff_tree(freq):
37.     '''Create a Huffman tree for characters with non-zero frequency
38.    Returns the root node of the Huffman tree'''
39.
40.     o = OrderedList()
41.     for i in range(0, 256):
42.         if freq[i] != 0:
44.
45.     while o.size() > 1:
46.         x, y = o.pop(0), o.pop(0)
47.         node = HuffmanNode(min(x.char, y.char), x.freq + y.freq)
48.         node.left, node.right = x, y
50.
51.     return o.pop()
52.
53. def create_code(node):
54.     '''Returns an array (Python list) of Huffman codes. For each character, use the integer ASCII representation
55.    as the index into the arrary, with the resulting Huffman code for that character stored at that location'''
56.     codes = [""] * 256
57.     code_helper(node, "", codes)
58.     return codes
59.
60. def code_helper(node, code, codes):
61.     if node is None:
62.         return
63.     if node.left is None and node.right is None:
64.         codes[node.char] = code
65.     else:
66.         code_helper(node.left, code + "0", codes)
67.         code_helper(node.right, code + "1", codes)
68.
70.     '''Input is the list of frequencies. Creates and returns a header for the output file
71.    Example: For the frequency list asscoaied with "aaabbbbcc, would return “97 3 98 4 99 2” '''
72.     ans = ""
73.     for ind, cnt in enumerate(freq):
74.         if cnt != 0:
75.             ans += str(ind) + " " + str(cnt) + " "
76.     return ans.strip()
77.
78. def huffman_encode(in_file, out_file):
79.     '''Takes inout file name and output file name as parameters - both files will have .txt extensions
80.    Uses the Huffman coding process on the text from the input file and writes encoded text to output file
81.    Also creates a second output file which adds _compressed before the .txt extension to the name of the file.
82.    This second file is actually compressed by writing individual 0 and 1 bits to the file using the utility methods
83.    provided in the huffman_bits_io module to write both the header and bits.
84.    Take not of special cases - empty file and file with only one unique character'''
85.
86.     i = open(in_file, "r")
87.     o = open(out_file, "w")
88.     hbw = HuffmanBitWriter(out_file[:-4] + "_compressed.txt")
89.
90.     freq = cnt_freq(in_file)
91.     s = create_header(freq) + "\n"
92.     o.write(s)
93.     hbw.write_str(s)
94.
95.     t = create_huff_tree(freq)
96.     codes = create_code(t)
97.
98.     while True: