Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #lang racket
- (define (make-leaf symbol weight)
- (list 'leaf symbol weight))
- (define (leaf? object)
- (eq? (first object) 'leaf))
- (define (symbol-leaf x) (second x))
- (define (weight-leaf x) (third x))
- (define (make-code-tree left right)
- (list left
- right
- (append (symbols left) (symbols right))
- (+ (weight left) (weight right))))
- (define (left-branch tree) (first tree))
- (define (right-branch tree) (second tree))
- (define (symbols tree)
- (if (leaf? tree)
- (list (symbol-leaf tree))
- (third tree)))
- (define (weight tree)
- (if (leaf? tree)
- (weight-leaf tree)
- (fourth tree)))
- (define (decode bits tree)
- (define (decode-1 bits current-branch)
- (cond [(empty? bits) empty]
- [else
- (define next-branch (choose-branch (first bits) current-branch))
- (cond [(leaf? next-branch)
- (cons (symbol-leaf next-branch)
- (decode-1 (rest bits) tree))]
- [else (decode-1 (rest bits) next-branch)])]))
- (decode-1 bits tree))
- (define (choose-branch bit branch)
- (cond [(zero? bit) (left-branch branch)]
- [(= bit 1) (right-branch branch)]
- [else (error "bad bit -- CHOOSE-BRANCH" bit)]))
- ;; represent a set of leaves and trees as an ordered list, ordered by weight
- (define (adjoin-set x st)
- (cond [(empty? st) (list x)]
- [(< (weight x) (weight (first st))) (cons x st)]
- [else (cons (first st)
- (adjoin-set x (rest st)))]))
- (define (make-leaf-set pairs)
- (cond [(empty? pairs) empty]
- [else
- (define pair (first pairs))
- (adjoin-set (make-leaf (first pair) (second pair))
- (make-leaf-set (rest pairs)))]))
- ;;;;;;;;;;
- ;; 2.67 ;;
- ;;;;;;;;;;
- (define sample-tree
- (make-code-tree (make-leaf 'A 4)
- (make-code-tree (make-leaf 'B 2)
- (make-code-tree (make-leaf 'D 1)
- (make-leaf 'C 1)))))
- (define sample-message '(0 1 1 0 0 1 0 1 0 1 1 1 0))
- (decode sample-message sample-tree)
- ;; '(A D A B B C A)
- ;;;;;;;;;;
- ;; 2.68 ;;
- ;;;;;;;;;;
- (define (encode message tree)
- (if (empty? message)
- empty
- (append (encode-symbol (first message) tree)
- (encode (rest message) tree))))
- (define (encode-symbol symbol tree)
- (define (loop code tree)
- (cond [(leaf? tree) (reverse code)]
- [(member symbol (symbols (left-branch tree)))
- (loop (cons '0 code) (left-branch tree))]
- [(member symbol (symbols (right-branch tree)))
- (loop (cons '1 code) (right-branch tree))]
- [else (error "symbol not in tree -- ENCODE-SYMBOL" symbol)]))
- (loop empty tree))
- (encode '(A D A B B C A) sample-tree)
- ;; '(0 1 1 0 0 1 0 1 0 1 1 1 0)
- (equal? sample-message
- (encode (decode sample-message
- sample-tree)
- sample-tree))
- ;; #t
- ;;;;;;;;;;
- ;; 2.69 ;;
- ;;;;;;;;;;
- (define (generate-huffman-tree pairs)
- (successive-merge (make-leaf-set pairs)))
- (define (successive-merge leaf-set) ; can assume leaf-set sorted
- (if (= (length leaf-set) 2)
- (make-code-tree (first leaf-set) (second leaf-set))
- (successive-merge
- (adjoin-set (make-code-tree (first leaf-set) (second leaf-set))
- (rest (rest leaf-set))))))
- ;; Note that the pairs passed to generate-huffman-tree do not have to be sorted.
- ;; The pairs get sorted by make-leaf-set which uses adjoin-set.
- (define sample-pairs '((A 4)
- (B 2)
- (C 1)
- (D 1)))
- (generate-huffman-tree sample-pairs)
- ;; '((leaf A 4)
- ;; ((leaf B 2) ((leaf D 1) (leaf C 1) (D C) 2) (B D C) 4)
- ;; (A B D C)
- ;; 8)
- ;;;;;;;;;;
- ;; 2.70 ;;
- ;;;;;;;;;;
- (define rock-pairs '((get 2)
- (a 2)
- (job 2)
- (sha 3)
- (na 16)
- (wah 1)
- (yip 9)
- (boom 1)))
- (define rock-tree (generate-huffman-tree rock-pairs))
- (define rock-song '(get a job sha na na na na na na na na
- get a job sha na na na na na na na na
- wah yip yip yip yip yip yip yip yip yip
- sha boom))
- (length rock-song)
- ;; 36
- (encode '(get) rock-tree)
- ;; '(1 1 0 0)
- (encode '(a) rock-tree)
- ;; '(1 1 1 1 1)
- (encode '(job) rock-tree)
- ;; '(1 1 1 1 0)
- (encode '(sha) rock-tree)
- ;; '(1 1 1 0)
- (encode '(na) rock-tree)
- ;; '(0)
- (encode '(wah) rock-tree)
- ;; '(1 1 0 1 1)
- (encode '(yip) rock-tree)
- ;; '(1 0)
- (encode '(boom) rock-tree)
- ;; '(1 1 0 1 0)
- (define rock-code (encode rock-song rock-tree))
- (length rock-code)
- ;; 84
- ;; rock-code
- ;; ;; '(1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 0
- ;; ;; 0 0 0 0 0 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1
- ;; ;; 1 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 0 1 0 1 0
- ;; ;; 1 0 1 0 1 0 1 0 1 0 1 0 1 1 1 0 1 1 0 1 0)
- (decode rock-code rock-tree)
- ;; '(get a job
- ;; sha na na na na na na na na
- ;; get a job
- ;; sha na na na na na na na na
- ;; wah yip yip yip yip yip yip yip yip yip
- ;; sha boom)
- ;; There are eight symbols in the rock alphabet. So a fixed-length code would
- ;; need three bits per word to encode all eight symbols. So the rock song would
- ;; require a code of 36 * 3 = 108 bits. The huffman code is only 84 bits which
- ;; is a savings of 24 bits or almost 29%.
- ;;;;;;;;;;
- ;; 2.71 ;;
- ;;;;;;;;;;
- (define five-powers-of-two-pairs '((a 16)
- (b 8)
- (c 4)
- (d 2)
- (e 1)))
- (define five-powers-of-two-tree (generate-huffman-tree five-powers-of-two-pairs))
- (encode '(a) five-powers-of-two-tree)
- ;; '(1)
- (encode '(b) five-powers-of-two-tree)
- ;; '(0 1)
- (encode '(c) five-powers-of-two-tree)
- ;; '(0 0 1)
- (encode '(d) five-powers-of-two-tree)
- ;; '(0 0 0 1)
- (encode '(e) five-powers-of-two-tree)
- ;; '(0 0 0 0)
- (define ten-powers-of-two-pairs '((a 512)
- (b 256)
- (c 128)
- (d 64)
- (e 32)
- (f 16)
- (g 8)
- (h 4)
- (i 2)
- (j 1)))
- (define ten-powers-of-two-tree (generate-huffman-tree ten-powers-of-two-pairs))
- (encode '(a) ten-powers-of-two-tree)
- ;; '(1)
- (encode '(b) ten-powers-of-two-tree)
- ;; '(0 1)
- (encode '(c) ten-powers-of-two-tree)
- ;; '(0 0 1)
- (encode '(d) ten-powers-of-two-tree)
- ;; '(0 0 0 1)
- (encode '(e) ten-powers-of-two-tree)
- ;; '(0 0 0 0 1)
- (encode '(f) ten-powers-of-two-tree)
- ;; '(0 0 0 0 0 1)
- (encode '(g) ten-powers-of-two-tree)
- ;; '(0 0 0 0 0 0 1)
- (encode '(h) ten-powers-of-two-tree)
- ;; '(0 0 0 0 0 0 0 1)
- (encode '(i) ten-powers-of-two-tree)
- ;; '(0 0 0 0 0 0 0 0 1)
- (encode '(j) ten-powers-of-two-tree)
- ;; '(0 0 0 0 0 0 0 0 0)
- ;; With frequencies that follow the powers of two, the huffman tree will be
- ;; maximally unbalanced. The most common letter will be encoded with one bit, and
- ;; the least common letter will be encoded with n - 1 bits.
- ;; Note also that using the powers of two for the frequencies is very inefficient.
- ;; The weights of the elements grow too fast. If you used some non-exponential
- ;; weighting system, where the weight of an element was less than the sum of the
- ;; weights of the less frequent elements, then you would get a better huffman tree
- ;; with more efficient encoding.
- ;; The difference is whether the leaf for a given symbol is a right or left branch
- ;; of its parent node. If the weight is greater than the sum of the weights of
- ;; all the less frequent elements, it is a right branch. If it is less, it is a
- ;; left branch.
- ;; When encoding an element, if its leaf is a right branch, you have to first
- ;; search through all the elements in the left branch before finding the leaf.
- ;; But that search is eliminated if the leaf is a left branch.
- ;; For example, in the ten-powers-of-two-tree, if the weight of 'a were 510
- ;; instead of 512, then its code would be '(0) instead of '(1), and encoding '(a)
- ;; would be O(1) instead of O(n).
- (encode '(a) (generate-huffman-tree (cons '(a 510) (rest ten-powers-of-two-pairs))))
- ;; '(0)
- ;;;;;;;;;;
- ;; 2.72 ;;
- ;;;;;;;;;;
- ;; Encoding a symbol requires traveling down the tree until we reach a leaf. At
- ;; each node, we have to check whether our symbol is in the list of symbols for
- ;; the left branch or not. That is O(# symbols in left branch). If it is, we go
- ;; left. If it is not, we then have to check whether our symbol is in the list of
- ;; symbols for the right branch or not. That is O(# symbols in right branch). If
- ;; it is, then we go right. If it is not, we signal an error.
- ;; So, in the worst-case scenario, we would have to search through all the symbols
- ;; at each level, and the encoding would be O(n * # levels) where n is the number
- ;; of letters in the alphabet.
- ;; For the powers of two example from 2.71, to encode the least frequent element
- ;; is O(n ^ 2) because that tree has O(n) levels, and encoding the most frequent
- ;; element is O(n) because we still have to search through all the elements in the
- ;; left branch of the root before finding our element in the right branch.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement