Advertisement
Guest User

Untitled

a guest
Jan 18th, 2017
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.04 KB | None | 0 0
  1. import heapq
  2. import os
  3.  
  4. class HeapNode:
  5. def __init__(self, char, freq):
  6. self.char = char
  7. self.freq = freq
  8. self.left = None
  9. self.right = None
  10.  
  11. def __cmp__(self, other):
  12. if(other == None):
  13. return -1
  14. if(not isinstance(other, HeapNode)):
  15. return -1
  16. return self.freq > other.freq
  17.  
  18.  
  19. class HuffmanCoding:
  20. def __init__(self, path):
  21. self.path = path
  22. self.heap = []
  23. self.codes = {}
  24. self.reverse_mapping = {}
  25.  
  26. # functions for compression:
  27.  
  28. def make_frequency_dict(self, text):
  29. frequency = {}
  30. for character in text:
  31. if not character in frequency:
  32. frequency[character] = 0
  33. frequency[character] += 1
  34. return frequency
  35.  
  36. def make_heap(self, frequency):
  37. for key in frequency:
  38. node = HeapNode(key, frequency[key])
  39. heapq.heappush(self.heap, node)
  40.  
  41. def merge_nodes(self):
  42. while(len(self.heap)>1):
  43. node1 = heapq.heappop(self.heap)
  44. node2 = heapq.heappop(self.heap)
  45.  
  46. merged = HeapNode(None, node1.freq + node2.freq)
  47. merged.left = node1
  48. merged.right = node2
  49.  
  50. heapq.heappush(self.heap, merged)
  51.  
  52.  
  53. def make_codes_helper(self, root, current_code):
  54. if(root == None):
  55. return
  56.  
  57. if(root.char != None):
  58. self.codes[root.char] = current_code
  59. self.reverse_mapping[current_code] = root.char
  60. return
  61.  
  62. self.make_codes_helper(root.left, current_code + "0")
  63. self.make_codes_helper(root.right, current_code + "1")
  64.  
  65.  
  66. def make_codes(self):
  67. root = heapq.heappop(self.heap)
  68. current_code = ""
  69. self.make_codes_helper(root, current_code)
  70.  
  71.  
  72. def get_encoded_text(self, text):
  73. encoded_text = ""
  74. for character in text:
  75. encoded_text += self.codes[character]
  76. return encoded_text
  77.  
  78.  
  79. def pad_encoded_text(self, encoded_text):
  80. extra_padding = 8 - len(encoded_text) % 8
  81. for i in range(extra_padding):
  82. encoded_text += "0"
  83.  
  84. padded_info = "{0:08b}".format(extra_padding)
  85. encoded_text = padded_info + encoded_text
  86. return encoded_text
  87.  
  88.  
  89. def get_byte_array(self, padded_encoded_text):
  90. if(len(padded_encoded_text) % 8 != 0):
  91. print("Encoded text not padded properly")
  92. exit(0)
  93.  
  94. b = bytearray()
  95. for i in range(0, len(padded_encoded_text), 8):
  96. byte = padded_encoded_text[i:i+8]
  97. b.append(int(byte, 2))
  98. return b
  99.  
  100.  
  101. def compress(self):
  102. filename, file_extension = os.path.splitext(self.path)
  103. output_path = filename + ".bin"
  104.  
  105. with open(self.path, 'r+') as file, open(output_path, 'wb') as output:
  106. text = file.read()
  107. text = text.rstrip()
  108.  
  109. frequency = self.make_frequency_dict(text)
  110. self.make_heap(frequency)
  111. self.merge_nodes()
  112. self.make_codes()
  113.  
  114. encoded_text = self.get_encoded_text(text)
  115. padded_encoded_text = self.pad_encoded_text(encoded_text)
  116.  
  117. b = self.get_byte_array(padded_encoded_text)
  118. output.write(bytes(b))
  119.  
  120. print("Compressed")
  121. return output_path
  122.  
  123.  
  124. """ functions for decompression: """
  125.  
  126. def text_to_binary(self, text):
  127. encoded_text = ""
  128. for character in text:
  129. ascii = ord(character)
  130. binary = "{0:08b}".format(ascii)
  131. encoded_text += binary
  132. return encoded_text
  133.  
  134. def remove_padding(self, padded_encoded_text):
  135. padded_info = padded_encoded_text[:8]
  136. extra_padding = int(padded_info, 2)
  137.  
  138. padded_encoded_text = padded_encoded_text[8:]
  139. encoded_text = padded_encoded_text[:-1*extra_padding]
  140.  
  141. return encoded_text
  142.  
  143. def decode_text(self, encoded_text):
  144. current_code = ""
  145. decoded_text = ""
  146.  
  147. for bit in encoded_text:
  148. current_code += bit
  149. if(current_code in self.reverse_mapping):
  150. character = self.reverse_mapping[current_code]
  151. decoded_text += character
  152. current_code = ""
  153.  
  154. return decoded_text
  155.  
  156.  
  157. def decompress(self, input_path):
  158. filename, file_extension = os.path.splitext(self.path)
  159. output_path = filename + "_decompressed" + ".txt"
  160.  
  161. with open(input_path, 'rb') as file, open(output_path, 'w') as output:
  162. bit_string = ""
  163.  
  164. byte = file.read(1)
  165. while(byte != ""):
  166. byte = ord(byte)
  167. bits = bin(byte)[2:].rjust(8, '0')
  168. bit_string += bits
  169. byte = file.read(1)
  170.  
  171. encoded_text = self.remove_padding(bit_string)
  172.  
  173. decompressed_text = self.decode_text(encoded_text)
  174.  
  175. output.write(decompressed_text)
  176.  
  177. print("Decompressed")
  178. return output_path
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement