Advertisement
Guest User

Untitled

a guest
Sep 28th, 2016
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 9.91 KB | None | 0 0
  1. package patmat
  2.  
  3. import common._
  4.  
  5. /**
  6.  * Assignment 4: Huffman coding
  7.  *
  8.  */
  9. object Huffman {
  10.  
  11.   /**
  12.    * A huffman code is represented by a binary tree.
  13.    *
  14.    * Every `Leaf` node of the tree represents one character of the alphabet that the tree can encode.
  15.    * The weight of a `Leaf` is the frequency of appearance of the character.
  16.    *
  17.    * The branches of the huffman tree, the `Fork` nodes, represent a set containing all the characters
  18.    * present in the leaves below it. The weight of a `Fork` node is the sum of the weights of these
  19.    * leaves.
  20.    */
  21.     abstract class CodeTree
  22.   case class Fork(left: CodeTree, right: CodeTree, chars: List[Char], weight: Int) extends CodeTree
  23.   case class Leaf(char: Char, weight: Int) extends CodeTree
  24.  
  25.  
  26.   // Part 1: Basics
  27.     def weight(tree: CodeTree): Int = tree match {
  28.       case Fork(_, _, _, w) => w
  29.       case Leaf(_, w) => w
  30.     }
  31.  
  32.     def chars(tree: CodeTree): List[Char] = tree match {
  33.       case Fork(_, _, charlist, _) => charlist
  34.       case Leaf(char, _) => List(char)
  35.     }
  36.  
  37.   def makeCodeTree(left: CodeTree, right: CodeTree) =
  38.     Fork(left, right, chars(left) ::: chars(right), weight(left) + weight(right))
  39.  
  40.  
  41.  
  42.   // Part 2: Generating Huffman trees
  43.  
  44.   /**
  45.    * In this assignment, we are working with lists of characters. This function allows
  46.    * you to easily create a character list from a given string.
  47.    */
  48.   def string2Chars(str: String): List[Char] = str.toList
  49.  
  50.   /**
  51.    * This function computes for each unique character in the list `chars` the number of
  52.    * times it occurs. For example, the invocation
  53.    *
  54.    *   times(List('a', 'b', 'a'))
  55.    *
  56.    * should return the following (the order of the resulting list is not important):
  57.    *
  58.    *   List(('a', 2), ('b', 1))
  59.    *
  60.    * The type `List[(Char, Int)]` denotes a list of pairs, where each pair consists of a
  61.    * character and an integer. Pairs can be constructed easily using parentheses:
  62.    *
  63.    *   val pair: (Char, Int) = ('c', 1)
  64.    *
  65.    * In order to access the two elements of a pair, you can use the accessors `_1` and `_2`:
  66.    *
  67.    *   val theChar = pair._1
  68.    *   val theInt  = pair._2
  69.    *
  70.    * Another way to deconstruct a pair is using pattern matching:
  71.    *
  72.    *   pair match {
  73.    *     case (theChar, theInt) =>
  74.    *       println("character is: "+ theChar)
  75.    *       println("integer is  : "+ theInt)
  76.    *   }
  77.    */
  78.     def times(chars: List[Char]): List[(Char, Int)] =  chars.groupBy(x => x).mapValues(_.length).toList
  79.  
  80.   /**
  81.    * Returns a list of `Leaf` nodes for a given frequency table `freqs`.
  82.    *
  83.    * The returned list should be ordered by ascending weights (i.e. the
  84.    * head of the list should have the smallest weight), where the weight
  85.    * of a leaf is the frequency of the character.
  86.    */
  87.     def makeOrderedLeafList(freqs: List[(Char, Int)]): List[Leaf] = freqs.sortBy(_._2).map(x => Leaf(x._1, x._2))
  88.  
  89.   /**
  90.    * Checks whether the list `trees` contains only one single code tree.
  91.    */
  92.     def singleton(trees: List[CodeTree]): Boolean = trees.size == 1
  93.  
  94.   /**
  95.    * The parameter `trees` of this function is a list of code trees ordered
  96.    * by ascending weights.
  97.    *
  98.    * This function takes the first two elements of the list `trees` and combines
  99.    * them into a single `Fork` node. This node is then added back into the
  100.    * remaining elements of `trees` at a position such that the ordering by weights
  101.    * is preserved.
  102.    *
  103.    * If `trees` is a list of less than two elements, that list should be returned
  104.    * unchanged.
  105.    */
  106.     def combine(trees: List[CodeTree]): List[CodeTree] = trees match {
  107.         case first :: second :: rest =>  (makeCodeTree(first, second) :: rest).sortWith( (x,y) => weight(x) < weight(y))
  108.         case _  => trees
  109.     }
  110.  
  111.   /**
  112.    * This function will be called in the following way:
  113.    *
  114.    *   until(singleton, combine)(trees)
  115.    *
  116.    * where `trees` is of type `List[CodeTree]`, `singleton` and `combine` refer to
  117.    * the two functions defined above.
  118.    *
  119.    * In such an invocation, `until` should call the two functions until the list of
  120.    * code trees contains only one single tree, and then return that singleton list.
  121.    *
  122.    * Hint: before writing the implementation,
  123.    *  - start by defining the parameter types such that the above example invocation
  124.    *    is valid. The parameter types of `until` should match the argument types of
  125.    *    the example invocation. Also define the return type of the `until` function.
  126.    *  - try to find sensible parameter names for `xxx`, `yyy` and `zzz`.
  127.    */
  128.     def until(singleton: List[CodeTree] =>Boolean, combine: List[CodeTree] => List[CodeTree])(trees: List[CodeTree]): List[CodeTree] = {
  129.       if(singleton(trees)) trees
  130.       else until(singleton,combine)(combine(trees))
  131.     }
  132.  
  133.   /**
  134.    * This function creates a code tree which is optimal to encode the text `chars`.
  135.    *
  136.    * The parameter `chars` is an arbitrary text. This function extracts the character
  137.    * frequencies from that text and creates a code tree based on them.
  138.    */
  139.     def createCodeTree(chars: List[Char]): CodeTree = until(singleton,combine)(makeOrderedLeafList(times(chars))).head
  140.  
  141.   // Part 3: Decoding
  142.  
  143.   type Bit = Int
  144.  
  145.   /**
  146.    * This function decodes the bit sequence `bits` using the code tree `tree` and returns
  147.    * the resulting list of characters.
  148.    */
  149.     def decode(tree: CodeTree, bits: List[Bit]): List[Char] = {
  150.       def traverse(remaining: CodeTree, bits: List[Bit]): List[Char] = remaining match {
  151.         case Leaf(c, _) if bits.isEmpty => List(c)
  152.         case Leaf(c, _) => c :: traverse(tree, bits)
  153.         case Fork(l,r,c,_) => if(bits.head == 0) traverse(l, bits.tail) else traverse(r, bits.tail)
  154.  
  155.       }
  156.       traverse(tree,bits)
  157.     }
  158.  
  159.   /**
  160.    * A Huffman coding tree for the French language.
  161.    * Generated from the data given at
  162.    *   http://fr.wikipedia.org/wiki/Fr%C3%A9quence_d%27apparition_des_lettres_en_fran%C3%A7ais
  163.    */
  164.   val frenchCode: CodeTree = Fork(Fork(Fork(Leaf('s',121895),Fork(Leaf('d',56269),Fork(Fork(Fork(Leaf('x',5928),Leaf('j',8351),List('x','j'),14279),Leaf('f',16351),List('x','j','f'),30630),Fork(Fork(Fork(Fork(Leaf('z',2093),Fork(Leaf('k',745),Leaf('w',1747),List('k','w'),2492),List('z','k','w'),4585),Leaf('y',4725),List('z','k','w','y'),9310),Leaf('h',11298),List('z','k','w','y','h'),20608),Leaf('q',20889),List('z','k','w','y','h','q'),41497),List('x','j','f','z','k','w','y','h','q'),72127),List('d','x','j','f','z','k','w','y','h','q'),128396),List('s','d','x','j','f','z','k','w','y','h','q'),250291),Fork(Fork(Leaf('o',82762),Leaf('l',83668),List('o','l'),166430),Fork(Fork(Leaf('m',45521),Leaf('p',46335),List('m','p'),91856),Leaf('u',96785),List('m','p','u'),188641),List('o','l','m','p','u'),355071),List('s','d','x','j','f','z','k','w','y','h','q','o','l','m','p','u'),605362),Fork(Fork(Fork(Leaf('r',100500),Fork(Leaf('c',50003),Fork(Leaf('v',24975),Fork(Leaf('g',13288),Leaf('b',13822),List('g','b'),27110),List('v','g','b'),52085),List('c','v','g','b'),102088),List('r','c','v','g','b'),202588),Fork(Leaf('n',108812),Leaf('t',111103),List('n','t'),219915),List('r','c','v','g','b','n','t'),422503),Fork(Leaf('e',225947),Fork(Leaf('i',115465),Leaf('a',117110),List('i','a'),232575),List('e','i','a'),458522),List('r','c','v','g','b','n','t','e','i','a'),881025),List('s','d','x','j','f','z','k','w','y','h','q','o','l','m','p','u','r','c','v','g','b','n','t','e','i','a'),1486387)
  165.  
  166.   /**
  167.    * What does the secret message say? Can you decode it?
  168.    * For the decoding use the `frenchCode' Huffman tree defined above.
  169.    */
  170.   val secret: List[Bit] = List(0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1)
  171.  
  172.   /**
  173.    * Write a function that returns the decoded secret
  174.    */
  175.     def decodedSecret: List[Char] = decode(frenchCode, secret)
  176.  
  177.  
  178.   // Part 4a: Encoding using Huffman tree
  179.  
  180.   /**
  181.    * This function encodes `text` using the code tree `tree`
  182.    * into a sequence of bits.
  183.    */
  184.     def encode(tree: CodeTree)(text: List[Char]): List[Bit] =  {
  185.       def singleEncode(tree: CodeTree)(char: Char): List[Bit] = tree match {
  186.         case Leaf(c,_) => Nil
  187.         case Fork(l,r,c,_) => if(chars(l).contains(char)) 0 :: singleEncode(l)(char) else 1 :: singleEncode(r)(char)
  188.       }
  189.       text.flatMap(singleEncode(tree))
  190.     }
  191.  
  192.   // Part 4b: Encoding using code table
  193.  
  194.   type CodeTable = List[(Char, List[Bit])]
  195.  
  196.   /**
  197.    * This function returns the bit sequence that represents the character `char` in
  198.    * the code table `table`.
  199.    */
  200.     def codeBits(table: CodeTable)(char: Char): List[Bit] = {
  201.       table.filter( code => code._1 == char).head._2
  202.     }
  203.  
  204.   /**
  205.    * Given a code tree, create a code table which contains, for every character in the
  206.    * code tree, the sequence of bits representing that character.
  207.    *
  208.    * Hint: think of a recursive solution: every sub-tree of the code tree `tree` is itself
  209.    * a valid code tree that can be represented as a code table. Using the code tables of the
  210.    * sub-trees, think of how to build the code table for the entire tree.
  211.    */
  212.     def convert(tree: CodeTree): CodeTable = tree match {
  213.       case Leaf(c, w) => List(c -> List())
  214.       case Fork(l, r, c, _) => c.map( char => char -> encode(tree)(List(char))).toList
  215.     }
  216.  
  217.   /**
  218.    * This function takes two code tables and merges them into one. Depending on how you
  219.    * use it in the `convert` method above, this merge method might also do some transformations
  220.    * on the two parameter code tables.
  221.    */
  222.     def mergeCodeTables(a: CodeTable, b: CodeTable): CodeTable = ???
  223.  
  224.   /**
  225.    * This function encodes `text` according to the code tree `tree`.
  226.    *
  227.    * To speed up the encoding process, it first converts the code tree to a code table
  228.    * and then uses it to perform the actual encoding.
  229.    */
  230.     def quickEncode(tree: CodeTree)(text: List[Char]): List[Bit] = ???
  231.   }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement