Advertisement
Guest User

Untitled

a guest
Jun 26th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.95 KB | None | 0 0
  1. #
  2. #
  3. #
  4. #Alice Zhang
  5. #Huffman encoding
  6. #HW#9
  7.  
  8. debug = True
  9.  
  10. """
  11. This program follows the design with the exception of addition of helper functions
  12. such as findChar.
  13. """
  14.  
  15. def main():
  16.     """starts the program, calls all functions, and prints necessary outputs"""
  17.     fileName = intro()
  18.     original = read_file(fileName)
  19.     dict={}
  20.     total_char=len(original)
  21.     if debug: print "Enter findFrequency function"
  22.     dict = findFrequency(dict, total_char, original)
  23.     if debug: print "The dict is ", dict
  24.     list2 = dict.keys()
  25.     if debug: print list2
  26.     print "   Distinct characters: " + str(len(list2))
  27.     total0 = total_char *8
  28.     print "   Total bytes: " + str(total0)
  29.     print "\n"
  30.     if debug: print "Enter treeBuilder function"
  31.     tree = treeBuilder(dict)[0]
  32.     if debug: print "The tree is ", tree
  33.     if debug: print "Enter makeKey function"
  34.     listKey = map(lambda x: makeKey(tree,x), list2)
  35.     if debug: print "The list of keys is ", listKey
  36.     binaryString = encode(original, list2, listKey)
  37.     if debug: print "The encoded binary string is ", binaryString
  38.     if debug: print "Enter compiler function"
  39.     string =  compiler(binaryString)
  40.     print string
  41.     string2 = addStuff(string, listKey, list2)
  42.     string2 = str(8-(total_char%8))+string2
  43.     f = open(fileName+ ".HUFFMAN", 'w')
  44.     f.write(string2)
  45.     print "COmpressed file: " + fileName + '.HUFFMAN'
  46.     dicOverhead = len(list2)*8 + len(listKey)*8
  47.     print "   Dictionary overhead in bytes: " + str(dicOverhead)
  48.     compressed = len(string)*8
  49.     print "   Compressed text length in bytes: "+ str(compressed)
  50.     total = len(string2)
  51.     print "   Total length in bytes: " +str(total)
  52.     print "   Actual compression ratio: " + str((total0-total)*1.00/total0)
  53.     print "   Asymptotic compression ratio: " +str(compressed*1.00/total0)
  54.    
  55.    
  56.  
  57. def intro():
  58.     """Ask for input. Output filename"""
  59.     if debug: print "Enter intro function"
  60.     fileName = raw_input("Enter name of file to be compressed: ")
  61.     print "Original file: " + fileName
  62.     return fileName
  63.  
  64. def read_file(fileName):
  65.     """Open the file and outputs a string containing the content of the file"""
  66.     if debug: print "Enter read_file function"
  67.     text_file = open(str(fileName))
  68.     original = text_file.read()
  69.     text_file.close()
  70.     return original
  71.  
  72. def findFrequency(dict,total_char, original):
  73.     """ Take the file, counts the number of occurrences of each symbol in the file,
  74. computes the frequencies, and adds to a dictionary.
  75. Outputs a dictionary containing keys and corresponding frequencies"""
  76.     if original == "": return dict
  77.     elif dict.has_key(original[0]):
  78.         previous_count=dict[original[0]] * total_char
  79.         del dict[original[0]]
  80.         dict[original[0]] = (previous_count+1)/total_char*1.00
  81.         return findFrequency(dict, total_char, original[1:])
  82.     else:
  83.         dict[original[0]] = 1/(total_char*1.00)
  84.         return findFrequency(dict, total_char, original[1:])
  85.  
  86. def leastFrequency(dict):
  87.     """Takes the dictionary with keys and frequencies and outputs the least
  88. frequent key and its frequency"""
  89.     list1 = []
  90.     list2 = dict.keys()
  91.     total = len(list2)
  92.     list1 = map(lambda x: dict[x], list2)
  93.     minFrequency = min(list1)
  94.     key = list2[list1.index(minFrequency)]
  95.     return [key, minFrequency]
  96.  
  97. def treeBuilder(dict):
  98.     """Takes the dictionary with keys and frequencies and outputs a tree based on
  99. the frequencies of the keys"""
  100.     if len(dict)==1: return dict.keys()
  101.     else:
  102.         min1 = leastFrequency(dict)
  103.         del dict[min1[0]]
  104.         min2 = leastFrequency(dict)
  105.         del dict[min2[0]]
  106.         dict[(min1[0],min2[0])]= min1[1] + min2[1]
  107.         return treeBuilder(dict)
  108.  
  109. def makeKey(tree, key):
  110.     """Takes tree and a key. Outputs the binary code of the key"""
  111.     if tree[0]==key :
  112.         return '0'
  113.     elif tree[1] == key:
  114.         return '1'
  115.     elif findChar(tree[0], key):
  116.         return '0' + makeKey(tree[0], key)
  117.     else:
  118.         return '1' + makeKey(tree[1], key)
  119.  
  120. def encode(original, list2, listKey):
  121.     """Takes the original string, list of codes, and list of keys, and outputs an
  122. encoded binary string"""
  123.     binaryString = ""
  124.     while original != "":
  125.         binaryString += listKey[list2.index(original[0])]
  126.         original = original[1:]
  127.     return binaryString
  128.  
  129. def findChar(tree, key):
  130.     """Takes tree and a key, and outputs if the key is in the tree"""
  131.     if key in tree: return True
  132.     elif type(tree[0]) == tuple:
  133.         if findChar(tree[0], key): return True
  134.         return findChar(tree[0], key) or findChar(tree[1], key)
  135.     elif len(tree)!= 1 and type(tree[1]) == tuple:
  136.         if findChar(tree[1], key): return True
  137.         return findChar(tree[0], key) or findChar(tree[1], key)
  138.     else:
  139.         return False
  140.  
  141. def compiler(binaryString):
  142.     """Takes the encoded binary string and outputs a string of characters from
  143. decoding the string based on ASCII"""
  144.     if len(binaryString) <= 8:
  145.         binaryString = binaryString + '0'*(8-len(binaryString))
  146.         return chr(binaryToNum(binaryString))
  147.     else:
  148.         return chr(binaryToNum(binaryString[:8]))+compiler(binaryString[8:])
  149.  
  150. def addStuff(string, listKey, list2):
  151.     """Takes the encoded string and adds listKey and listCode to the top of the
  152. file for decoding purpose"""
  153.     if debug: print "Enter addStuff function"
  154.     return str(listKey) + '\n'+ str(list2) + '\n'+ string
  155. """
  156. The addStuff function adds the list of all unique characters, and a
  157. list with corresponding binary codes of each. These two lists will be at
  158. the top of the compressed file, along with the number of filler bits that is
  159. attached to the end of the file(added in main).
  160. """
  161.  
  162. def binaryToNum(S):
  163.     """convert binary S to base-10"""
  164.     if S=='': return 0
  165.     elif S[-1] =='1': return 2*binaryToNum(S[:-1])+1
  166.     else:
  167.         return 2*binaryToNum(S[:-1])
  168.    
  169. """if __name__ == "__main__" : main()
  170. """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement