Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import os
- def shannon_fano_encoder(iA, iB): # iA to iB : index interval
- global tupleList
- size = iB - iA + 1
- if size > 1:
- # Divide the list into 2 groups.
- # Top group will get 0, bottom 1 as the new encoding bit.
- mid = int(size / 2 + iA)
- for i in range(iA, iB + 1):
- tup = tupleList[i]
- if i < mid: # top group
- tupleList[i] = (tup[0], tup[1], tup[2] + '0')
- else: # bottom group
- tupleList[i] = (tup[0], tup[1], tup[2] + '1')
- # do recursive calls for both groups
- shannon_fano_encoder(iA, mid - 1)
- shannon_fano_encoder(mid, iB)
- with open('sample.txt', 'r') as myfile:
- txt=myfile.read().replace('\n', '')
- print(txt)
- inputFile = 'sample.txt'
- # read the whole input file into a byte array
- fileSize = os.path.getsize(inputFile)
- fi = open(inputFile, 'rb')
- # byteArr = map(ord, fi.read(fileSize))
- byteArr = bytearray(fi.read(fileSize))
- fi.close()
- fileSize = len(byteArr)
- # calculate the total number of each byte value in the file
- freqList = [0] * 256
- for b in byteArr:
- freqList[b] += 1
- # create a list of (frequency, byteValue, encodingBitStr) tuples
- tupleList = []
- for b in range(256):
- if freqList[b] > 0:
- tupleList.append((freqList[b], b, ''))
- # sort the list according to the frequencies descending
- tupleList = sorted(tupleList, key=lambda tup: tup[0], reverse = True)
- shannon_fano_encoder(0, len(tupleList) - 1)
- # create a dictionary of byteValue : encodingBitStr pairs
- dic = dict([(tup[1], tup[2]) for tup in tupleList])
- del tupleList # unneeded anymore
- a=0
- for i in dic.items():
- print(chr(i[0]),i[1])
- a=a+len(i[1])
- print("Without compression: ",(os.path.getsize('sample.txt')*8),"bits")
- print("Shannon encoder: ",a,"bits")
- print("Difference in size: ",100-int((a*100)/(os.path.getsize('sample.txt')*8)),"%")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement