Advertisement
Guest User

Untitled

a guest
Sep 26th, 2016
379
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 10.06 KB | None | 0 0
  1. package patmat
  2.  
  3. import common._
  4.  
  5. /**
  6.  * Assignment 4: Huffman coding
  7.  *
  8.  */
  9. object Huffman {
  10.  
  11.   /**
  12.    * A huffman code is represented by a binary tree.
  13.    *
  14.    * Every `Leaf` node of the tree represents one character of the alphabet that the tree can encode.
  15.    * The weight of a `Leaf` is the frequency of appearance of the character.
  16.    *
  17.    * The branches of the huffman tree, the `Fork` nodes, represent a set containing all the characters
  18.    * present in the leaves below it. The weight of a `Fork` node is the sum of the weights of these
  19.    * leaves.
  20.    */
  21.   abstract class CodeTree
  22.   case class Fork(left: CodeTree, right: CodeTree, chars: List[Char], weight: Int) extends CodeTree
  23.   case class Leaf(char: Char, weight: Int) extends CodeTree
  24.  
  25.   // Part 1: Basics..
  26.   def weight(tree: CodeTree): Int = tree match {
  27.     case Leaf(char, z) => z
  28.     case Fork(left, right, chars, z) => weight(left) + weight(right)
  29.   }
  30.  
  31.   def chars(tree: CodeTree): List[Char] = tree match {
  32.     case Leaf(char, z) => char :: Nil
  33.     case Fork(left, right, char, z) => chars(left) ::: chars(right)
  34.   }
  35.  
  36.   def makeCodeTree(left: CodeTree, right: CodeTree) =
  37.     Fork(left, right, chars(left) ::: chars(right), weight(left) + weight(right))
  38.  
  39.   // Part 2: Generating Huffman trees
  40.  
  41.   /**
  42.    * In this assignment, we are working with lists of characters. This function allows
  43.    * you to easily create a character list from a given string.
  44.    */
  45.   def string2Chars(str: String): List[Char] = str.toList
  46.  
  47.   /**
  48.    * This function computes for each unique character in the list `chars` the number of
  49.    * times it occurs. For example, the invocation
  50.    *
  51.    *   times(List('a', 'b', 'a'))
  52.    *
  53.    * should return the following (the order of the resulting list is not important):
  54.    *
  55.    *   List(('a', 2), ('b', 1))
  56.    *
  57.    * The type `List[(Char, Int)]` denotes a list of pairs, where each pair consists of a
  58.    * character and an integer. Pairs can be constructed easily using parentheses:
  59.    *
  60.    *   val pair: (Char, Int) = ('c', 1)
  61.    *
  62.    * In order to access the two elements of a pair, you can use the accessors `_1` and `_2`:
  63.    *
  64.    *   val theChar = pair._1
  65.    *   val theInt  = pair._2
  66.    *
  67.    * Another way to deconstruct a pair is using pattern matching:
  68.    *
  69.    *   pair match {
  70.    *     case (theChar, theInt) =>
  71.    *       println("character is: "+ theChar)
  72.    *       println("integer is  : "+ theInt)
  73.    *   }
  74.    */
  75.   def times(chars: List[Char]): List[(Char, Int)] = {
  76.     def count(theChar: Char, charList: List[Char], z: Int): (Char, Int) = {
  77.       if (charList.isEmpty) (theChar, z)
  78.       else if (charList.head == theChar) count(theChar, charList.tail, z + 1)
  79.       else count(theChar, charList.tail, z)
  80.     }
  81.     if (chars.isEmpty) Nil
  82.     else count(chars.head, chars.tail, 1) :: times(chars.tail.filter(_ != chars.head))
  83.  
  84.   }
  85.  
  86.   /**
  87.    * Returns a list of `Leaf` nodes for a given frequency table `freqs`.
  88.    *
  89.    * The returned list should be ordered by ascending weights (i.e. the
  90.    * head of the list should have the smallest weight), where the weight
  91.    * of a leaf is the frequency of the character.
  92.    */
  93.   def makeOrderedLeafList(freqs: List[(Char, Int)]): List[Leaf] = {
  94.     def makeList(aList: List[(Char, Int)]): List[Leaf] = {
  95.       if (aList.isEmpty) Nil
  96.       else new Leaf(aList.head._1, aList.head._2) :: makeList(aList.tail)
  97.     }
  98.     val sList = freqs.sortWith((x, y) => x._2 < y._2)
  99.     makeList(sList)
  100.   }
  101.  
  102.   /**
  103.    * Checks whether the list `trees` contains only one single code tree.
  104.    */
  105.   def singleton(trees: List[CodeTree]): Boolean =
  106.     if (trees.isEmpty) false
  107.     else if (trees.tail.isEmpty) true
  108.     else false
  109.  
  110.   /**
  111.    * The parameter `trees` of this function is a list of code trees ordered
  112.    * by ascending weights.
  113.    *
  114.    * This function takes the first two elements of the list `trees` and combines
  115.    * them into a single `Fork` node. This node is then added back into the
  116.    * remaining elements of `trees` at a position such that the ordering by weights
  117.    * is preserved.
  118.    *
  119.    * If `trees` is a list of less than two elements, that list should be returned
  120.    * unchanged.
  121.    */
  122.   def combine(trees: List[CodeTree]): List[CodeTree] = {
  123.     def insert(aFork: CodeTree, aList: List[CodeTree]): List[CodeTree] =
  124.       if (aList.isEmpty) List(aFork)
  125.       else (aFork :: aList).sortWith((x, y) => weight(x) < weight(y))
  126.     if (trees.isEmpty) trees
  127.     else if (singleton(trees)) trees
  128.     else if (singleton(trees.tail)) insert(makeCodeTree(trees.head, trees.tail.head), Nil)
  129.     else insert(makeCodeTree(trees.head, trees.tail.head), trees.tail.tail)
  130.   }
  131.   /**
  132.    * This function will be called in the following way:
  133.    *
  134.    *   until(singleton, combine)(trees)
  135.    *
  136.    * where `trees` is of type `List[CodeTree]`, `singleton` and `combine` refer to
  137.    * the two functions defined above.
  138.    *
  139.    * In such an invocation, `until` should call the two functions until the list of
  140.    * code trees contains only one single tree, and then return that singleton list.
  141.    *
  142.    * Hint: before writing the implementation,
  143.    *  - start by defining the parameter types such that the above example invocation
  144.    *    is valid. The parameter types of `until` should match the argument types of
  145.    *    the example invocation. Also define the return type of the `until` function.
  146.    *  - try to find sensible parameter names for `xxx`, `yyy` and `zzz`.
  147.    */
  148.   def until(aFunction: List[CodeTree] => Boolean, anOperation: List[CodeTree] => List[CodeTree])(aTree: List[CodeTree]): List[CodeTree] =
  149.     if (aFunction(aTree)) aTree
  150.     else until(aFunction, anOperation)(combine(aTree))
  151.  
  152.   /**
  153.    * This function creates a code tree which is optimal to encode the text `chars`.
  154.    *
  155.    * The parameter `chars` is an arbitrary text. This function extracts the character
  156.    * frequencies from that text and creates a code tree based on them.
  157.    */
  158.   def createCodeTree(chars: List[Char]): CodeTree = {
  159.     val aList = makeOrderedLeafList(times(chars))
  160.     until(singleton, combine)(aList).head
  161.   }
  162.  
  163.   // Part 3: Decoding
  164.  
  165.   type Bit = Int
  166.  
  167.   /**
  168.    * This function decodes the bit sequence `bits` using the code tree `tree` and returns
  169.    * the resulting list of characters.
  170.    */
  171.   def decode(tree: CodeTree, bits: List[Bit]): List[Char] = {
  172.     def walk(aTree: CodeTree, bits: List[Bit], charList: List[Char]): List[Char] =
  173.         aTree match {
  174.           case Leaf(char, z) => {
  175.             if (bits.isEmpty) charList :+ char
  176.             else walk(tree, bits, charList :+ char)
  177.           }
  178.           case Fork(left, right, chars, z) => {
  179.             if (bits.head == 1) walk(right, bits.tail, charList)
  180.             else walk(left, bits.tail, charList)
  181.           }
  182.         }
  183.      
  184.     walk(tree, bits, Nil)
  185.   }
  186.  
  187.   /**
  188.    * A Huffman coding tree for the French language.
  189.    * Generated from the data given at
  190.    *   http://fr.wikipedia.org/wiki/Fr%C3%A9quence_d%27apparition_des_lettres_en_fran%C3%A7ais
  191.    */
  192.   val frenchCode: CodeTree = Fork(Fork(Fork(Leaf('s', 121895), Fork(Leaf('d', 56269), Fork(Fork(Fork(Leaf('x', 5928), Leaf('j', 8351), List('x', 'j'), 14279), Leaf('f', 16351), List('x', 'j', 'f'), 30630), Fork(Fork(Fork(Fork(Leaf('z', 2093), Fork(Leaf('k', 745), Leaf('w', 1747), List('k', 'w'), 2492), List('z', 'k', 'w'), 4585), Leaf('y', 4725), List('z', 'k', 'w', 'y'), 9310), Leaf('h', 11298), List('z', 'k', 'w', 'y', 'h'), 20608), Leaf('q', 20889), List('z', 'k', 'w', 'y', 'h', 'q'), 41497), List('x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q'), 72127), List('d', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q'), 128396), List('s', 'd', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q'), 250291), Fork(Fork(Leaf('o', 82762), Leaf('l', 83668), List('o', 'l'), 166430), Fork(Fork(Leaf('m', 45521), Leaf('p', 46335), List('m', 'p'), 91856), Leaf('u', 96785), List('m', 'p', 'u'), 188641), List('o', 'l', 'm', 'p', 'u'), 355071), List('s', 'd', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q', 'o', 'l', 'm', 'p', 'u'), 605362), Fork(Fork(Fork(Leaf('r', 100500), Fork(Leaf('c', 50003), Fork(Leaf('v', 24975), Fork(Leaf('g', 13288), Leaf('b', 13822), List('g', 'b'), 27110), List('v', 'g', 'b'), 52085), List('c', 'v', 'g', 'b'), 102088), List('r', 'c', 'v', 'g', 'b'), 202588), Fork(Leaf('n', 108812), Leaf('t', 111103), List('n', 't'), 219915), List('r', 'c', 'v', 'g', 'b', 'n', 't'), 422503), Fork(Leaf('e', 225947), Fork(Leaf('i', 115465), Leaf('a', 117110), List('i', 'a'), 232575), List('e', 'i', 'a'), 458522), List('r', 'c', 'v', 'g', 'b', 'n', 't', 'e', 'i', 'a'), 881025), List('s', 'd', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q', 'o', 'l', 'm', 'p', 'u', 'r', 'c', 'v', 'g', 'b', 'n', 't', 'e', 'i', 'a'), 1486387)
  193.  
  194.   /**
  195.    * What does the secret message say? Can you decode it?
  196.    * For the decoding use the `frenchCode' Huffman tree defined above.
  197.    */
  198.   val secret: List[Bit] = List(0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1)
  199.  
  200.   /**
  201.    * Write a function that returns the decoded secret
  202.    */
  203.   def decodedSecret: List[Char] = decode(frenchCode, secret)
  204.  
  205.   // Part 4a: Encoding using Huffman tree
  206.  
  207.   /**
  208.    * This function encodes `text` using the code tree `tree`
  209.    * into a sequence of bits.
  210.    */
  211.   def encode(tree: CodeTree)(text: List[Char]): List[Bit] = {
  212.     def walk(aTree: CodeTree, bits: List[Bit], charList: List[Char]): List[Bit] = {
  213.       if (charList.isEmpty) bits
  214.       else aTree match {
  215.         case Leaf(char, z) => walk(tree, bits, charList.tail)
  216.         case Fork(left: Leaf, right, chars, z) if left.char == charList.head => walk(left, bits :+ 0, charList)
  217.         case Fork(left: Fork, right, chars, z) if left.chars.contains(charList.head) => walk(left, bits :+ 0, charList)
  218.         case Fork(left, right: Leaf, chars, z) if right.char == charList.head => walk(right, bits :+ 1, charList)
  219.         case Fork(left, right: Fork, chars, z) if right.chars.contains(charList.head) => walk(right, bits :+ 1, charList)
  220.       }
  221.     }
  222.     walk(tree, Nil, text)
  223.   }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement