Advertisement
Guest User

Untitled

a guest
Jun 3rd, 2019
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Clojure 10.27 KB | None | 0 0
  1. (ns edaa40.lab5
  2.     (:use [edaa40.core]
  3.           [edaa40.huffutil] )
  4. )
  5.  
  6.  
  7. ;;
  8. ;;  some test data
  9. ;;
  10.  
  11. (def ConstantSequence (vec (for [i (range 10000)] 11)) )
  12.  
  13. (def ConstantSequenceInitialQueue [ {:kind :leaf :value 11 :frequency 10000} ] )
  14.  
  15. (def ConstantSequenceTree {:kind :leaf :value 11 :frequency 10000} )
  16.  
  17. (def ConstantSequenceBits '() )
  18.  
  19. (def ConstantSequenceCodes {11 '()} )
  20.  
  21. (def ConstantSequenceHuffmanCode {:tree ConstantSequenceTree :length (count ConstantSequence) :bits '()} )
  22.  
  23.  
  24.  
  25. (def SimpleSequence [:a :a :a :b :b :c :a :a :b :a :c :b :d :a :a])
  26.  
  27. (def SimpleSequenceInitialQueue
  28.    [{:kind :leaf :value :d :frequency 1}
  29.     {:kind :leaf :value :c :frequency 2}
  30.     {:kind :leaf :value :b :frequency 4}
  31.     {:kind :leaf :value :a :frequency 8}] )
  32.  
  33. (def SimpleSequenceTree
  34.     {:kind :branch,
  35.         :left {:kind :branch,
  36.             :left {:kind :branch,
  37.                 :left {:kind :leaf, :value :d, :frequency 1},
  38.                 :right {:kind :leaf, :value :c, :frequency 2},
  39.                 :frequency 3},
  40.             :right {:kind :leaf, :value :b, :frequency 4},
  41.             :frequency 7},
  42.         :right {:kind :leaf, :value :a, :frequency 8},
  43.         :frequency 15} )
  44.        
  45. (def SimpleSequenceBits '(1 1 1 0 1 0 1 0 0 1 1 1 0 1 1 0 0 1 0 1 0 0 0 1 1))
  46.  
  47. (def SimpleSequenceCodes {:d '(0 0 0) :c '(0 0 1) :b '(0 1) :a '(1)})
  48.  
  49. (def SimpleSequenceHuffmanCode {:tree SimpleSequenceTree :length (count SimpleSequence) :bits SimpleSequenceBits})
  50.  
  51.  
  52.  
  53. (def TextBytes (slurp-bytes "data/txt"))
  54.  
  55. (def TextBytesInitialQueue
  56.     (let [ F (frequencies TextBytes) ]
  57.         (sort-by :frequency (map #(make-leaf % (F %)) (keys F)))
  58.     )
  59. )
  60.  
  61. (def TextBytesLeaf {:kind :leaf, :value 122, :frequency 311} )
  62.  
  63. (def TextBytesQueueWithoutLeaf (filter #(not= % TextBytesLeaf) TextBytesInitialQueue) )
  64.  
  65.  
  66. ;;
  67. ;;  building a Huffman tree
  68. ;;
  69.  
  70. (declare insert-into-queue)
  71.  
  72. (defn insert-into-queue
  73.     "Inserts a tree T into the queue Q such that the entries in the queue are arranged in ascending order of frequency.
  74.     Q is a list of trees ordered by the value of their :frequency field."
  75.     [T Q]
  76.    
  77.     {
  78.         :pre [
  79.             (is-sorted-by? :frequency Q)
  80.         ]
  81.         :post [
  82.             (is-sorted-by? :frequency %)
  83.             (= (count %) (inc (count Q)))
  84.         ]
  85.     }
  86.    
  87.     (sort-by :frequency (conj Q T))
  88. )
  89.  
  90. (test? "insert-into-queue 1" (insert-into-queue (last SimpleSequenceInitialQueue) (drop-last SimpleSequenceInitialQueue)) SimpleSequenceInitialQueue)
  91. (test? "insert-into-queue 2" (insert-into-queue TextBytesLeaf TextBytesQueueWithoutLeaf) TextBytesInitialQueue)
  92.  
  93.  
  94. (declare create-tree)
  95.  
  96. (defn create-tree
  97.     "Recursively create a tree from a queue of trees ordered by frequency.
  98.     Q is a list of trees ordered by the value of their :frequency field.
  99.     The algorithm as as follows:
  100.      (a) If the queue is of length one, it's done. The tree is the one element in the queue.
  101.      (b) Otherwise, take the first two elements in the queue, and make a tree from them consisting of
  102.          a branch node with the first element as its left and the second element as the right child.
  103.          The :frequency field of the new tree is the sum of the frequencies of the two elements.
  104.      (c) Insert the newly created tree into the rest of the queue (without the first two elements) according
  105.          to its :frequency field (use insert-into-queue for this).
  106.      (d) Call create-tree on the queue resulting from (c)."
  107.      
  108.     [Q]
  109.    
  110.     {
  111.         :pre [
  112.             (is-sorted-by? :frequency Q)
  113.         ]
  114.     }
  115.    
  116.     (if (<= (count Q) 1)
  117.         (first Q)
  118.         (create-tree (drop 2 (insert-into-queue (make-branch (first Q) (second Q)) Q)))
  119.  
  120.     ;; hint: you could use the function make-branch from edaa40.huffutil. Also take a look at first, second, and drop.
  121.     )
  122. )
  123.  
  124. (test? "create-tree 1" (create-tree ConstantSequenceInitialQueue) ConstantSequenceTree)
  125. (test? "create-tree 2" (create-tree SimpleSequenceInitialQueue) SimpleSequenceTree)
  126. (test? "create-tree 3" (:frequency (create-tree TextBytesInitialQueue)) 595248)
  127.  
  128.  
  129. (declare huffman-tree)
  130.  
  131. (defn huffman-tree
  132.     "Create a Huffman tree from a sequence of symbols.
  133.     The following steps have to be taken:
  134.      (a) Compute the frequencies of symbols in S.
  135.      (b) Create a list of leaf nodes for each symbol. Each leaf node includes the symbol's frequency.
  136.      (c) Sort that list in order of ascending frequency. This is the initial queue.
  137.      (d) Call create-tree on this sorted list of leaf nodes."
  138.  
  139.     [S]
  140.    
  141.     (create-tree (sort-by :frequency (map #(make-leaf % ((frequencies S) %)) (keys (frequencies S)) )))
  142.  
  143.     ;; hint: my solution uses frequencies, map, keys and from edaa40.huffutil the function make-leaf.
  144.     ;; hint two: take a look at the definition of TextBytesInitialQueue.
  145.  
  146. )
  147.  
  148. (test? "huffman-tree 1" (huffman-tree ConstantSequence) ConstantSequenceTree)
  149. (test? "huffman-tree 2" (huffman-tree SimpleSequence) SimpleSequenceTree)
  150.  
  151.  
  152.  
  153.  
  154. ;;
  155. ;; creating a Huffman code map from a tree
  156. ;;
  157.  
  158. ;; you might need to define other functions used by "huffman-codes' in this place
  159.  
  160. (declare huffman-codes)
  161.  
  162. (defn huffman-codes
  163.     "Given a Huffman tree, compute the Huffman codes for each symbol in it.
  164.    Returns a map mapping each symbol to a sequence of bits (0 or 1)."
  165.     [T]
  166.  
  167.  
  168.      (defn symbols [T]
  169.         (if (isleaf? T)
  170.             (list (T :value))
  171.             (reduce concat (map symbols (list (T :left) (T :right))))
  172.             )
  173.         )
  174.  
  175.     (defn find-strings [T v p]
  176.         (if (isleaf? T)
  177.             (if (= (T :value) v) p nil)
  178.             (first (filter some? (list (find-strings (T :left) v (conj p 0)) (find-strings (T :right) v (conj p 1)))))
  179.         )
  180.     )
  181.  
  182.            
  183.            
  184.         (into (sorted-map) (map #(vector % (find-strings T % [])) (symbols T)))
  185.      
  186.      ;; hint: for building the map, take a look at the function into --- my solutions both look like this:
  187.      ;;       (into {} ...)
  188.      ;; they also both involve defining other functions, for computing all symbols in the tree, for
  189.      ;; finding the bit string for a symbol in the tree, or other things...
  190. )
  191.  
  192.  
  193. (test? "huffman-codes 1" (huffman-codes ConstantSequenceTree) ConstantSequenceCodes)
  194. (test? "huffman-codes 2" (huffman-codes SimpleSequenceTree) SimpleSequenceCodes)
  195.  
  196.  
  197. ;;
  198. ;;  Huffman encoding a byte sequence
  199. ;;
  200.  
  201. (declare huffman-encode)
  202.  
  203. (defn huffman-encode
  204.     "Produces the complete Huffman code for a sequence of bytes (0 to 255).
  205.    A Huffman code is represented as a map containing a Huffman tree, the length of the original sequence, and the sequence of bits encoding it."
  206.     [S]
  207.    
  208.     (def tree (huffman-tree S))
  209.     (def codes (huffman-codes tree))
  210.  
  211.     {:tree tree :length (count S) :bits (mapcat #(get codes %) S)}
  212.  
  213.     ;; hint: take a look at the function mapcat; I also used huffman-tree and huffman-codes
  214. )
  215.  
  216. (test? "huffman-encode 1" (huffman-encode ConstantSequence) ConstantSequenceHuffmanCode)
  217. (test? "huffman-encode 2" (huffman-encode SimpleSequence) SimpleSequenceHuffmanCode)
  218. (test? "huffman-encode 3" (count (:bits (huffman-encode TextBytes))) 2661055)
  219.  
  220. ;;
  221. ;;  Huffman decoding a bit sequence
  222. ;;
  223.  
  224. (declare decode-symbol)
  225.  
  226. (defn decode-symbol
  227.     "Uses the beginning of the provided bit sequence to decode the next symbol based on the tree T.
  228.    Returns a map with the decoded symbol in the :value field and the remaining bit sequence as :remaining-bits."
  229.  
  230.     [T bits]
  231.    
  232.     (if (isleaf? T)
  233.         {:value (T :value) :remaining-bits bits}
  234.         (if (= (first bits) 0)
  235.             (decode-symbol (T :left) (drop 1 bits))
  236.             (decode-symbol (T :right) (drop 1 bits))
  237.         )
  238.     )
  239.  
  240.     ;; hint: this is pretty straightforward recursive descent --- you might want to use isleaf? at some point
  241.  
  242. )
  243.  
  244. (test? "decode-symbol 1" (decode-symbol SimpleSequenceTree SimpleSequenceBits) {:value :a :remaining-bits (drop 1 SimpleSequenceBits)})
  245. (test? "decode-symbol 2" (decode-symbol SimpleSequenceTree SimpleSequenceBits) {:value :a :remaining-bits (drop 1 SimpleSequenceBits)})
  246. (test? "decode-symbol 3" (decode-symbol SimpleSequenceTree (drop 3 SimpleSequenceBits)) {:value :b :remaining-bits (drop 5 SimpleSequenceBits)})
  247. (test? "decode-symbol 4" (decode-symbol SimpleSequenceTree (drop 7 SimpleSequenceBits)) {:value :c :remaining-bits (drop 10 SimpleSequenceBits)})
  248.  
  249.  
  250. (defn huffman-decode
  251.     "Decode a Huffman code (comprising a Huffman tree, a length, and bits representing a Huffman encoding) into a sequence of bytes of the specified length."
  252.  
  253.     [H]
  254.    
  255.     (loop
  256.         [
  257.             N       (:length H)
  258.             bits    (:bits H)
  259.             S       []
  260.         ]
  261.  
  262.         (if (= N 0)
  263.             S
  264.             (let
  265.                 [ {v :value rbits :remaining-bits} (decode-symbol (:tree H) bits) ]
  266.                
  267.                 (recur (dec N) rbits (conj S v))
  268.             )
  269.         )
  270.     )
  271. )
  272.  
  273. ;; when you are done with the previous tests...
  274.  
  275. (test? "huffman-decode 1" (huffman-decode (huffman-encode ConstantSequence)) ConstantSequence)
  276. (test? "huffman-decode 2" (huffman-decode (huffman-encode SimpleSequence)) SimpleSequence)
  277. (test? "huffman-decode 3" (huffman-decode (huffman-encode TextBytes)) TextBytes)
  278.  
  279.  
  280.  
  281. ;;
  282. ;; Huffman file compression and decompression
  283. ;;
  284. ;; You can use these to try out the coding and decoding on files. It operates on bytes as symbols.
  285. ;;
  286.  
  287. (defn huffman-compress
  288.     "Compresses a file using a Huffman code. Stores the complete code (incl. tree and original length) along with the bits."
  289.     [infile outfile]
  290.    
  291.     (let
  292.         [
  293.             in-data     (slurp-bytes infile)
  294.             h           (huffman-encode in-data)
  295.             out-data    (create-huffman-bytes h)
  296.         ]
  297.        
  298.         (spit-bytes outfile out-data)
  299.     )
  300. )
  301.            
  302. (defn huffman-decompress
  303.     "Decompresses a file containing a complete Huffman code into the original."
  304.     [infile outfile]
  305.    
  306.     (let
  307.         [
  308.             in-data     (slurp-bytes infile)
  309.             h           (parse-huffman-bytes in-data)
  310.             out-data    (huffman-decode (:tree h) (:length h) (:bits h))
  311.         ]
  312.        
  313.         (spit-bytes outfile out-data)
  314.     )
  315. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement