Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from heapq import heappush, heappop, heapify
- from collections import defaultdict
- import os
- def encode(symb2freq):
- heap = [[wt, [sym, ""]] for sym, wt in symb2freq.items()]
- heapify(heap)
- while len(heap) > 1:
- lo = heappop(heap)
- hi = heappop(heap)
- for pair in lo[1:]:
- pair[1] = '0' + pair[1]
- for pair in hi[1:]:
- pair[1] = '1' + pair[1]
- heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
- return sorted(heappop(heap)[1:], key=lambda p: (len(p[-1]), p))
- with open('sample.txt', 'r') as myfile:
- txt=myfile.read().replace('\n', '')
- print(txt)
- ##txt = "this is an example for huffman encoding"
- symb2freq = defaultdict(int)
- for ch in txt:
- symb2freq[ch] += 1
- huff = encode(symb2freq)
- a=0
- print ("Symbol\tWeight\tHuffman Code")
- for p in huff:
- print ("%s\t%s\t%s" % (p[0], symb2freq[p[0]], p[1]))
- a=a+len(p[1])
- print("Without compression: ",(os.path.getsize('sample.txt')*8),"bits")
- print("Huffman Coding: ",a,"bits")
- print("Difference in size: ",100-int((a*100)/(os.path.getsize('sample.txt')*8)),"%")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement