Advertisement
Guest User

Untitled

a guest
Sep 25th, 2016
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 10.68 KB | None | 0 0
  1. package patmat
  2.  
  3. /**
  4.   * Assignment 4: Huffman coding
  5.   *
  6.   */
  7. object Huffman {
  8.  
  9.   /**
  10.     * A huffman code is represented by a binary tree.
  11.     *
  12.     * Every `Leaf` node of the tree represents one character of the alphabet that the tree can encode.
  13.     * The weight of a `Leaf` is the frequency of appearance of the character.
  14.     *
  15.     * The branches of the huffman tree, the `Fork` nodes, represent a set containing all the characters
  16.     * present in the leaves below it. The weight of a `Fork` node is the sum of the weights of these
  17.     * leaves.
  18.     */
  19.   abstract class CodeTree
  20.  
  21.   case class Fork(left: CodeTree, right: CodeTree, chars: List[Char], weight: Int) extends CodeTree
  22.  
  23.   case class Leaf(char: Char, weight: Int) extends CodeTree
  24.  
  25.  
  26.   // Part 1: Basics
  27.   def weight(tree: CodeTree): Int = tree match {
  28.     case Fork(left, right, chars, weight) => weight
  29.     case Leaf(char, weight) => weight
  30.   }
  31.  
  32.   def chars(tree: CodeTree): List[Char] = tree match {
  33.     case Fork(left, right, chars, weight) => chars
  34.     case Leaf(char, weight) => char :: Nil
  35.   }
  36.  
  37.   def makeCodeTree(left: CodeTree, right: CodeTree) =
  38.     Fork(left, right, chars(left) ::: chars(right), weight(left) + weight(right))
  39.  
  40.  
  41.   // Part 2: Generating Huffman trees
  42.  
  43.   /**
  44.     * In this assignment, we are working with lists of characters. This function allows
  45.     * you to easily create a character list from a given string.
  46.     */
  47.   def string2Chars(str: String): List[Char] = str.toList
  48.  
  49.   /**
  50.     * This function computes for each unique character in the list `chars` the number of
  51.     * times it occurs. For example, the invocation
  52.     *
  53.     * times(List('a', 'b', 'a'))
  54.     *
  55.     * should return the following (the order of the resulting list is not important):
  56.     *
  57.     * List(('a', 2), ('b', 1))
  58.     *
  59.     * The type `List[(Char, Int)]` denotes a list of pairs, where each pair consists of a
  60.     * character and an integer. Pairs can be constructed easily using parentheses:
  61.     *
  62.     * val pair: (Char, Int) = ('c', 1)
  63.     *
  64.     * In order to access the two elements of a pair, you can use the accessors `_1` and `_2`:
  65.     *
  66.     * val theChar = pair._1
  67.     * val theInt  = pair._2
  68.     *
  69.     * Another way to deconstruct a pair is using pattern matching:
  70.     *
  71.     * pair match {
  72.     * case (theChar, theInt) =>
  73.     * println("character is: "+ theChar)
  74.     * println("integer is  : "+ theInt)
  75.     * }
  76.     */
  77.   def times(chars: List[Char]): List[(Char, Int)] = chars.groupBy(w => w).mapValues(_.size).toList
  78.  
  79.   /**
  80.     * Returns a list of `Leaf` nodes for a given frequency table `freqs`.
  81.     *
  82.     * The returned list should be ordered by ascending weights (i.e. the
  83.     * head of the list should have the smallest weight), where the weight
  84.     * of a leaf is the frequency of the character.
  85.     */
  86.   def makeOrderedLeafList(freqs: List[(Char, Int)]): List[Leaf] = freqs.sortBy(_._2).map(f => Leaf(f._1, f._2))
  87.  
  88.   /**
  89.     * Checks whether the list `trees` contains only one single code tree.
  90.     */
  91.   def singleton(trees: List[CodeTree]): Boolean = trees.size == 1
  92.  
  93.   /**
  94.     * The parameter `trees` of this function is a list of code trees ordered
  95.     * by ascending weights.
  96.     *
  97.     * This function takes the first two elements of the list `trees` and combines
  98.     * them into a single `Fork` node. This node is then added back into the
  99.     * remaining elements of `trees` at a position such that the ordering by weights
  100.     * is preserved.
  101.     *
  102.     * If `trees` is a list of less than two elements, that list should be returned
  103.     * unchanged.
  104.     */
  105.   def combine(trees: List[CodeTree]): List[CodeTree] = if (trees.size < 2) trees
  106.   else (makeCodeTree(trees.head, trees.tail.head) :: trees.tail.tail).sortBy(weight)
  107.  
  108.   /**
  109.     * This function will be called in the following way:
  110.     *
  111.     * until(singleton, combine)(trees)
  112.     *
  113.     * where `trees` is of type `List[CodeTree]`, `singleton` and `combine` refer to
  114.     * the two functions defined above.
  115.     *
  116.     * In such an invocation, `until` should call the two functions until the list of
  117.     * code trees contains only one single tree, and then return that singleton list.
  118.     *
  119.     * Hint: before writing the implementation,
  120.     *  - start by defining the parameter types such that the above example invocation
  121.     * is valid. The parameter types of `until` should match the argument types of
  122.     * the example invocation. Also define the return type of the `until` function.
  123.     *  - try to find sensible parameter names for `xxx`, `yyy` and `zzz`.
  124.     */
  125.   def until(isSingleton: List[CodeTree] => Boolean, combiner: List[CodeTree] => List[CodeTree])(trees: List[CodeTree]): List[CodeTree] =
  126.   if (isSingleton(trees)) trees
  127.   else until(isSingleton, combiner)(combiner(trees))
  128.  
  129.  
  130.   /**
  131.     * This function creates a code tree which is optimal to encode the text `chars`.
  132.     *
  133.     * The parameter `chars` is an arbitrary text. This function extracts the character
  134.     * frequencies from that text and creates a code tree based on them.
  135.     */
  136.   def createCodeTree(chars: List[Char]): CodeTree = {
  137.     until(singleton, combine)(makeOrderedLeafList(times(chars))).head
  138.   }
  139.  
  140.  
  141.   // Part 3: Decoding
  142.  
  143.   type Bit = Int
  144.  
  145.   /**
  146.     * This function decodes the bit sequence `bits` using the code tree `tree` and returns
  147.     * the resulting list of characters.
  148.     */
  149.   def decode(tree: CodeTree, bits: List[Bit]): List[Char] = {
  150.     def f(cur: CodeTree, bit: List[Bit])(answer: List[Char]): List[Char] = {
  151.       cur match {
  152.         case cur: Leaf => if (bit.nonEmpty) f(tree, bit)(answer ::: List(cur.char)) else answer ::: List(cur.char)
  153.         case cur: Fork =>
  154.           if (bit.head == 0) f(cur.left, bit.tail)(answer)
  155.           else f(cur.right, bit.tail)(answer)
  156.       }
  157.     }
  158.  
  159.     f(tree, bits)(Nil)
  160.   }
  161.  
  162.   /**
  163.     * A Huffman coding tree for the French language.
  164.     * Generated from the data given at
  165.     * http://fr.wikipedia.org/wiki/Fr%C3%A9quence_d%27apparition_des_lettres_en_fran%C3%A7ais
  166.     */
  167.   val frenchCode: CodeTree = Fork(Fork(Fork(Leaf('s', 121895), Fork(Leaf('d', 56269), Fork(Fork(Fork(Leaf('x', 5928), Leaf('j', 8351), List('x', 'j'), 14279), Leaf('f', 16351), List('x', 'j', 'f'), 30630), Fork(Fork(Fork(Fork(Leaf('z', 2093), Fork(Leaf('k', 745), Leaf('w', 1747), List('k', 'w'), 2492), List('z', 'k', 'w'), 4585), Leaf('y', 4725), List('z', 'k', 'w', 'y'), 9310), Leaf('h', 11298), List('z', 'k', 'w', 'y', 'h'), 20608), Leaf('q', 20889), List('z', 'k', 'w', 'y', 'h', 'q'), 41497), List('x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q'), 72127), List('d', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q'), 128396), List('s', 'd', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q'), 250291), Fork(Fork(Leaf('o', 82762), Leaf('l', 83668), List('o', 'l'), 166430), Fork(Fork(Leaf('m', 45521), Leaf('p', 46335), List('m', 'p'), 91856), Leaf('u', 96785), List('m', 'p', 'u'), 188641), List('o', 'l', 'm', 'p', 'u'), 355071), List('s', 'd', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q', 'o', 'l', 'm', 'p', 'u'), 605362), Fork(Fork(Fork(Leaf('r', 100500), Fork(Leaf('c', 50003), Fork(Leaf('v', 24975), Fork(Leaf('g', 13288), Leaf('b', 13822), List('g', 'b'), 27110), List('v', 'g', 'b'), 52085), List('c', 'v', 'g', 'b'), 102088), List('r', 'c', 'v', 'g', 'b'), 202588), Fork(Leaf('n', 108812), Leaf('t', 111103), List('n', 't'), 219915), List('r', 'c', 'v', 'g', 'b', 'n', 't'), 422503), Fork(Leaf('e', 225947), Fork(Leaf('i', 115465), Leaf('a', 117110), List('i', 'a'), 232575), List('e', 'i', 'a'), 458522), List('r', 'c', 'v', 'g', 'b', 'n', 't', 'e', 'i', 'a'), 881025), List('s', 'd', 'x', 'j', 'f', 'z', 'k', 'w', 'y', 'h', 'q', 'o', 'l', 'm', 'p', 'u', 'r', 'c', 'v', 'g', 'b', 'n', 't', 'e', 'i', 'a'), 1486387)
  168.  
  169.   /**
  170.     * What does the secret message say? Can you decode it?
  171.     * For the decoding use the 'frenchCode' Huffman tree defined above.
  172.     **/
  173.   val secret: List[Bit] = List(0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1)
  174.  
  175.   /**
  176.     * Write a function that returns the decoded secret
  177.     */
  178.   def decodedSecret: List[Char] = {
  179.     decode(frenchCode, secret)
  180.   }
  181.  
  182.   // Part 4a: Encoding using Huffman tree
  183.  
  184.   /**
  185.     * This function encodes `text` using the code tree `tree`
  186.     * into a sequence of bits.
  187.     */
  188.   def encode(tree: CodeTree)(text: List[Char]): List[Bit] = {
  189.     def contains(cur: CodeTree, curChar: Char): Boolean = {
  190.       cur match {
  191.         case cur: Leaf => cur.char == curChar
  192.         case cur: Fork => cur.chars.contains(curChar)
  193.       }
  194.     }
  195.  
  196.     def getBit(cur: CodeTree, curChar: Char)(curBits: List[Bit]): List[Bit] = {
  197.       cur match {
  198.         case cur: Leaf => curBits
  199.         case cur: Fork =>
  200.           if (contains(cur.left, curChar)) getBit(cur.left, curChar)(curBits ::: List(0))
  201.           else getBit(cur.right, curChar)(curBits ::: List(1))
  202.       }
  203.     }
  204.  
  205.     def iter(text: List[Char])(answer: List[Bit]): List[Bit] = {
  206.       if (text.isEmpty) answer
  207.       else iter(text.tail)(answer ::: getBit(tree, text.head)(Nil))
  208.     }
  209.  
  210.     iter(text)(Nil)
  211.   }
  212.  
  213.  
  214.   // Part 4b: Encoding using code table
  215.  
  216.   type CodeTable = List[(Char, List[Bit])]
  217.  
  218.   /**
  219.     * This function returns the bit sequence that represents the character `char` in
  220.     * the code table `table`.
  221.     */
  222.   def codeBits(table: CodeTable)(char: Char): List[Bit] = {
  223.     def iter(table: CodeTable): List[Bit] = {
  224.       if (table.isEmpty) throw new NoSuchElementException
  225.       else if (table.head._1 == char) table.head._2
  226.       else iter(table.tail)
  227.     }
  228.     iter(table)
  229.   }
  230.  
  231.   /**
  232.     * Given a code tree, create a code table which contains, for every character in the
  233.     * code tree, the sequence of bits representing that character.
  234.     *
  235.     * Hint: think of a recursive solution: every sub-tree of the code tree `tree` is itself
  236.     * a valid code tree that can be represented as a code table. Using the code tables of the
  237.     * sub-trees, think of how to build the code table for the entire tree.
  238.     */
  239.   def convert(tree: CodeTree): CodeTable = ???
  240.  
  241.   /**
  242.     * This function takes two code tables and merges them into one. Depending on how you
  243.     * use it in the `convert` method above, this merge method might also do some transformations
  244.     * on the two parameter code tables.
  245.     */
  246.   def mergeCodeTables(a: CodeTable, b: CodeTable): CodeTable = ???
  247.  
  248.   /**
  249.     * This function encodes `text` according to the code tree `tree`.
  250.     *
  251.     * To speed up the encoding process, it first converts the code tree to a code table
  252.     * and then uses it to perform the actual encoding.
  253.     */
  254.   def quickEncode(tree: CodeTree)(text: List[Char]): List[Bit] = ???
  255. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement