Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ruby
- require 'rubygems'
- require 'tree'
- require 'set'
- class Tree::TreeNode
- # Yield once for each leaf node.
- def each_leaf
- self.each { |node| yield(node) if not node.hasChildren? }
- end
- # Both assume exactly 2 children
- def left_child; children.first; end
- def right_child; children.last; end
- end
- # FreqNodes are TreeNodes that:
- # 1) Have generated, numeric names to ensure they're all unique.
- # 2) Compare based primarily on their contents.
- # 3) Have as contents an array containing a frequency, and possibly a letter
- # (for the leaves).
- # 4) Have a @side attribute that should be set to either :left or :right for
- # non-root nodes.
- # 5) Have a few other nicities, like letter().
- class FreqNode < Tree::TreeNode
- attr_accessor :side
- def initialize(content = nil)
- super(FreqNode.next_name, content)
- end
- def <=>(node)
- content_diff = @content <=> node.content
- return content_diff if not content_diff.zero?
- super(node)
- end
- # Assumes this is a leaf.
- def letter; @content[1]; end
- def to_s; super() + " Side: #{@side}"; end
- # Generate unique names because we can't add two nodes with the same name as
- # children of the same parent node.
- @next_name_num = -1
- def FreqNode.next_name
- (@next_name_num += 1).to_s
- end
- end
- class Set
- def shift
- elem = self.min
- self.delete elem
- elem
- end
- end
- # Build a SortedSet containing pairs of [freq, letter] for the given string.
- def build_freqs str
- # Build hash of byte => count
- counts = Hash.new 0
- str.each_byte { |byte| counts[byte.chr] += 1 }
- # Build SortedSet of [freq, byte] pairs (lower freqs first).
- freqs = SortedSet[]
- counts.each { |bc| freqs << bc.reverse }
- freqs
- end
- # Build the tree for the given input string, and return the root node.
- def build_tree str
- nodes = build_freqs(str).map! { |pair| FreqNode.new(pair) }
- while nodes.size > 1
- child1, child2 = nodes.shift, nodes.shift
- parent = FreqNode.new([child1.content.first + child2.content.first])
- child1.side, child2.side = :left, :right
- parent << child1
- parent << child2
- nodes << parent
- end
- nodes.min
- end
- # Encode the given letter using the given tree of FreqNodes.
- def encode_letter letter, tree
- enc = ''
- # Find leaf with the right byte value
- node = nil
- tree.each_leaf do |leaf|
- (node = leaf; break) if leaf.letter == letter
- end
- while not node.isRoot?
- node.side == :left ? enc << '0' : enc << '1'
- node = node.parent
- end
- enc.reverse
- end
- # Build a tree for the given string, and encode it using that tree.
- def encode str, tree = build_tree(str)
- #tree = build_tree(str)
- encoded = ''
- str.each_byte { |byte| encoded << encode_letter(byte.chr, tree) }
- encoded
- end
- # Decode the given string (which should consist of '0's and '1's) using the
- # given tree.
- def decode str, tree
- dec = ''
- i = 0
- node = tree
- str.each_byte do |byte|
- node = (byte.chr == '0' ? node.left_child : node.right_child)
- if not node.hasChildren?
- dec << node.byte_value.chr
- node = tree
- end
- end
- dec
- end
- class String
- # Split up the string by inserting sep between len-length chunks.
- def split_up! len, sep
- (((length / len.to_f).ceil - 1) * len).step(len, -len) do |i|
- insert i, sep
- end
- self
- end
- end
- # Output a binary string, splitting it up for easier reading.
- def puts_binary str
- str = str.dup
- str.split_up! 40, "\n"
- str.each { |line| puts line.split_up!(8, ' ') }
- end
- if __FILE__ == $0
- input = ARGV.join ' '
- input_bytes = input.length
- enc = encode(input)
- enc_bytes = (enc.length / 8.0).ceil
- compressed = ((1 - enc_bytes.to_f/input_bytes) * 100).round
- puts 'Encoded:'
- puts_binary(enc)
- puts "Encoded bytes: #{enc_bytes}\n\n"
- puts "Original:\n#{input}"
- puts "Original bytes: #{input_bytes}\n\n"
- puts "Compressed: #{compressed}%"
- end
Add Comment
Please, Sign In to add comment