Advertisement
Guest User

Untitled

a guest
Dec 18th, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.11 KB | None | 0 0
  1. from heapq import heappush, heappop, heapify
  2. from collections import defaultdict
  3. import os
  4. def encode(symb2freq):
  5.     heap = [[wt, [sym, ""]] for sym, wt in symb2freq.items()]
  6.     heapify(heap)
  7.     while len(heap) > 1:
  8.         lo = heappop(heap)
  9.         hi = heappop(heap)
  10.         for pair in lo[1:]:
  11.             pair[1] = '0' + pair[1]
  12.         for pair in hi[1:]:
  13.             pair[1] = '1' + pair[1]
  14.         heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
  15.     return sorted(heappop(heap)[1:], key=lambda p: (len(p[-1]), p))
  16.  
  17.  
  18. with open('sample.txt', 'r') as myfile:
  19.     txt=myfile.read().replace('\n', '')
  20. print(txt)
  21.    
  22. ##txt = "this is an example for huffman encoding"
  23.  
  24. symb2freq = defaultdict(int)
  25. for ch in txt:
  26.     symb2freq[ch] += 1
  27.  
  28. huff = encode(symb2freq)
  29. a=0
  30.  
  31. print ("Symbol\tWeight\tHuffman Code")
  32. for p in huff:
  33.     print ("%s\t%s\t%s" % (p[0], symb2freq[p[0]], p[1]))
  34.     a=a+len(p[1])
  35.  
  36. print("Without compression: ",(os.path.getsize('sample.txt')*8),"bits")
  37. print("Huffman Coding: ",a,"bits")
  38. print("Difference in size: ",100-int((a*100)/(os.path.getsize('sample.txt')*8)),"%")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement