Advertisement
Guest User

Untitled

a guest
Dec 18th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.93 KB | None | 0 0
  1. import sys
  2. import os
  3.  
  4. def shannon_fano_encoder(iA, iB): # iA to iB : index interval
  5.     global tupleList
  6.     size = iB - iA + 1
  7.     if size > 1:
  8.         # Divide the list into 2 groups.
  9.         # Top group will get 0, bottom 1 as the new encoding bit.
  10.         mid = int(size / 2 + iA)
  11.         for i in range(iA, iB + 1):
  12.             tup = tupleList[i]
  13.             if i < mid: # top group
  14.  
  15.                 tupleList[i] = (tup[0], tup[1], tup[2] + '0')
  16.             else: # bottom group
  17.                 tupleList[i] = (tup[0], tup[1], tup[2] + '1')
  18.         # do recursive calls for both groups
  19.         shannon_fano_encoder(iA, mid - 1)
  20.         shannon_fano_encoder(mid, iB)
  21.  
  22.  
  23. with open('sample.txt', 'r') as myfile:
  24.     txt=myfile.read().replace('\n', '')
  25. print(txt)
  26.  
  27.  
  28.  
  29. inputFile = 'sample.txt'
  30.  
  31.  
  32. # read the whole input file into a byte array
  33. fileSize = os.path.getsize(inputFile)
  34.  
  35. fi = open(inputFile, 'rb')    
  36. # byteArr = map(ord, fi.read(fileSize))
  37. byteArr = bytearray(fi.read(fileSize))
  38. fi.close()
  39. fileSize = len(byteArr)
  40.  
  41.  
  42. # calculate the total number of each byte value in the file
  43. freqList = [0] * 256
  44. for b in byteArr:
  45.     freqList[b] += 1
  46.  
  47. # create a list of (frequency, byteValue, encodingBitStr) tuples
  48. tupleList = []
  49. for b in range(256):
  50.     if freqList[b] > 0:
  51.         tupleList.append((freqList[b], b, ''))
  52.  
  53. # sort the list according to the frequencies descending
  54. tupleList = sorted(tupleList, key=lambda tup: tup[0], reverse = True)
  55.  
  56. shannon_fano_encoder(0, len(tupleList) - 1)
  57.  
  58. # create a dictionary of byteValue : encodingBitStr pairs
  59. dic = dict([(tup[1], tup[2]) for tup in tupleList])
  60. del tupleList # unneeded anymore
  61.  
  62. a=0
  63. for i in dic.items():
  64.     print(chr(i[0]),i[1])
  65.     a=a+len(i[1])
  66.    
  67. print("Without compression: ",(os.path.getsize('sample.txt')*8),"bits")
  68. print("Shannon encoder: ",a,"bits")
  69. print("Difference in size: ",100-int((a*100)/(os.path.getsize('sample.txt')*8)),"%")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement