benjaminliu

Untitled

Oct 18th, 2020
740
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from ordered_list import *
  2. from huffman_bit_writer import *
  3.  
  4. class HuffmanNode:
  5.     def __init__(self, char, freq):
  6.         self.char = char   # stored as an integer - the ASCII character code value
  7.         self.freq = freq   # the freqency associated with the node
  8.         self.left = None   # Huffman tree (node) to the left
  9.         self.right = None  # Huffman tree (node) to the right
  10.  
  11.     def __eq__(self, other):
  12.         '''Needed in order to be inserted into OrderedList'''
  13.         return other is not None and self.char == other.char and self.freq == other.freq
  14.  
  15.     def __lt__(self, other):
  16.         '''Needed in order to be inserted into OrderedList'''
  17.         if self.freq == other.freq:
  18.             return self.char < other.char
  19.         return self.freq < other.freq
  20.  
  21.  
  22. def cnt_freq(filename):
  23.     '''Opens a text file with a given file name (passed as a string) and counts the
  24.    frequency of occurrences of all the characters within that file'''
  25.     f = open(filename)
  26.     cnt = [0] * 256
  27.     while True:
  28.         c = f.read(1)
  29.         if not c:
  30.             break
  31.         cnt[ord(c)] += 1
  32.     f.close()
  33.     return cnt
  34.  
  35.  
  36. def create_huff_tree(freq):
  37.     '''Create a Huffman tree for characters with non-zero frequency
  38.    Returns the root node of the Huffman tree'''
  39.  
  40.     o = OrderedList()
  41.     for i in range(0, 256):
  42.         if freq[i] != 0:
  43.             o.add(HuffmanNode(i, freq[i]))
  44.  
  45.     while o.size() > 1:
  46.         x, y = o.pop(0), o.pop(0)
  47.         node = HuffmanNode(min(x.char, y.char), x.freq + y.freq)
  48.         node.left, node.right = x, y
  49.         o.add(node)
  50.  
  51.     return o.pop()
  52.  
  53. def create_code(node):
  54.     '''Returns an array (Python list) of Huffman codes. For each character, use the integer ASCII representation
  55.    as the index into the arrary, with the resulting Huffman code for that character stored at that location'''
  56.     codes = [""] * 256
  57.     code_helper(node, "", codes)
  58.     return codes
  59.  
  60. def code_helper(node, code, codes):
  61.     if node is None:
  62.         return
  63.     if node.left is None and node.right is None:
  64.         codes[node.char] = code
  65.     else:
  66.         code_helper(node.left, code + "0", codes)
  67.         code_helper(node.right, code + "1", codes)
  68.  
  69. def create_header(freq):
  70.     '''Input is the list of frequencies. Creates and returns a header for the output file
  71.    Example: For the frequency list asscoaied with "aaabbbbcc, would return “97 3 98 4 99 2” '''
  72.     ans = ""
  73.     for ind, cnt in enumerate(freq):
  74.         if cnt != 0:
  75.             ans += str(ind) + " " + str(cnt) + " "
  76.     return ans.strip()
  77.  
  78. def huffman_encode(in_file, out_file):
  79.     '''Takes inout file name and output file name as parameters - both files will have .txt extensions
  80.    Uses the Huffman coding process on the text from the input file and writes encoded text to output file
  81.    Also creates a second output file which adds _compressed before the .txt extension to the name of the file.
  82.    This second file is actually compressed by writing individual 0 and 1 bits to the file using the utility methods
  83.    provided in the huffman_bits_io module to write both the header and bits.
  84.    Take not of special cases - empty file and file with only one unique character'''
  85.  
  86.     i = open(in_file, "r")
  87.     o = open(out_file, "w")
  88.     hbw = HuffmanBitWriter(out_file[:-4] + "_compressed.txt")
  89.  
  90.     freq = cnt_freq(in_file)
  91.     s = create_header(freq) + "\n"
  92.     o.write(s)
  93.     hbw.write_str(s)
  94.  
  95.     t = create_huff_tree(freq)
  96.     codes = create_code(t)
  97.  
  98.     while True:
  99.         c = i.read(1)
  100.         if not c:
  101.             break
  102.         o.write(codes[ord(c)])
  103.         hbw.write_code(codes[ord(c)])
  104.  
  105.     o.write("\n")
  106.  
  107.     i.close()
  108.     o.close()
  109.     hbw.close()
  110.  
RAW Paste Data