Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- #
- #
- #Alice Zhang
- #Huffman encoding
- #HW#9
- debug = True
- """
- This program follows the design with the exception of addition of helper functions
- such as findChar.
- """
- def main():
- """starts the program, calls all functions, and prints necessary outputs"""
- fileName = intro()
- original = read_file(fileName)
- dict={}
- total_char=len(original)
- if debug: print "Enter findFrequency function"
- dict = findFrequency(dict, total_char, original)
- if debug: print "The dict is ", dict
- list2 = dict.keys()
- if debug: print list2
- print " Distinct characters: " + str(len(list2))
- total0 = total_char *8
- print " Total bytes: " + str(total0)
- print "\n"
- if debug: print "Enter treeBuilder function"
- tree = treeBuilder(dict)[0]
- if debug: print "The tree is ", tree
- if debug: print "Enter makeKey function"
- listKey = map(lambda x: makeKey(tree,x), list2)
- if debug: print "The list of keys is ", listKey
- binaryString = encode(original, list2, listKey)
- if debug: print "The encoded binary string is ", binaryString
- if debug: print "Enter compiler function"
- string = compiler(binaryString)
- print string
- string2 = addStuff(string, listKey, list2)
- string2 = str(8-(total_char%8))+string2
- f = open(fileName+ ".HUFFMAN", 'w')
- f.write(string2)
- print "COmpressed file: " + fileName + '.HUFFMAN'
- dicOverhead = len(list2)*8 + len(listKey)*8
- print " Dictionary overhead in bytes: " + str(dicOverhead)
- compressed = len(string)*8
- print " Compressed text length in bytes: "+ str(compressed)
- total = len(string2)
- print " Total length in bytes: " +str(total)
- print " Actual compression ratio: " + str((total0-total)*1.00/total0)
- print " Asymptotic compression ratio: " +str(compressed*1.00/total0)
- def intro():
- """Ask for input. Output filename"""
- if debug: print "Enter intro function"
- fileName = raw_input("Enter name of file to be compressed: ")
- print "Original file: " + fileName
- return fileName
- def read_file(fileName):
- """Open the file and outputs a string containing the content of the file"""
- if debug: print "Enter read_file function"
- text_file = open(str(fileName))
- original = text_file.read()
- text_file.close()
- return original
- def findFrequency(dict,total_char, original):
- """ Take the file, counts the number of occurrences of each symbol in the file,
- computes the frequencies, and adds to a dictionary.
- Outputs a dictionary containing keys and corresponding frequencies"""
- if original == "": return dict
- elif dict.has_key(original[0]):
- previous_count=dict[original[0]] * total_char
- del dict[original[0]]
- dict[original[0]] = (previous_count+1)/total_char*1.00
- return findFrequency(dict, total_char, original[1:])
- else:
- dict[original[0]] = 1/(total_char*1.00)
- return findFrequency(dict, total_char, original[1:])
- def leastFrequency(dict):
- """Takes the dictionary with keys and frequencies and outputs the least
- frequent key and its frequency"""
- list1 = []
- list2 = dict.keys()
- total = len(list2)
- list1 = map(lambda x: dict[x], list2)
- minFrequency = min(list1)
- key = list2[list1.index(minFrequency)]
- return [key, minFrequency]
- def treeBuilder(dict):
- """Takes the dictionary with keys and frequencies and outputs a tree based on
- the frequencies of the keys"""
- if len(dict)==1: return dict.keys()
- else:
- min1 = leastFrequency(dict)
- del dict[min1[0]]
- min2 = leastFrequency(dict)
- del dict[min2[0]]
- dict[(min1[0],min2[0])]= min1[1] + min2[1]
- return treeBuilder(dict)
- def makeKey(tree, key):
- """Takes tree and a key. Outputs the binary code of the key"""
- if tree[0]==key :
- return '0'
- elif tree[1] == key:
- return '1'
- elif findChar(tree[0], key):
- return '0' + makeKey(tree[0], key)
- else:
- return '1' + makeKey(tree[1], key)
- def encode(original, list2, listKey):
- """Takes the original string, list of codes, and list of keys, and outputs an
- encoded binary string"""
- binaryString = ""
- while original != "":
- binaryString += listKey[list2.index(original[0])]
- original = original[1:]
- return binaryString
- def findChar(tree, key):
- """Takes tree and a key, and outputs if the key is in the tree"""
- if key in tree: return True
- elif type(tree[0]) == tuple:
- if findChar(tree[0], key): return True
- return findChar(tree[0], key) or findChar(tree[1], key)
- elif len(tree)!= 1 and type(tree[1]) == tuple:
- if findChar(tree[1], key): return True
- return findChar(tree[0], key) or findChar(tree[1], key)
- else:
- return False
- def compiler(binaryString):
- """Takes the encoded binary string and outputs a string of characters from
- decoding the string based on ASCII"""
- if len(binaryString) <= 8:
- binaryString = binaryString + '0'*(8-len(binaryString))
- return chr(binaryToNum(binaryString))
- else:
- return chr(binaryToNum(binaryString[:8]))+compiler(binaryString[8:])
- def addStuff(string, listKey, list2):
- """Takes the encoded string and adds listKey and listCode to the top of the
- file for decoding purpose"""
- if debug: print "Enter addStuff function"
- return str(listKey) + '\n'+ str(list2) + '\n'+ string
- """
- The addStuff function adds the list of all unique characters, and a
- list with corresponding binary codes of each. These two lists will be at
- the top of the compressed file, along with the number of filler bits that is
- attached to the end of the file(added in main).
- """
- def binaryToNum(S):
- """convert binary S to base-10"""
- if S=='': return 0
- elif S[-1] =='1': return 2*binaryToNum(S[:-1])+1
- else:
- return 2*binaryToNum(S[:-1])
- """if __name__ == "__main__" : main()
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement