daily pastebin goal
50%
SHARE
TWEET

Untitled

a guest May 17th, 2018 108 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env ruby
  2.  
  3. require 'rubygems'
  4. require 'tree'
  5. require 'set'
  6.  
  7. class Tree::TreeNode
  8.   # Yield once for each leaf node.
  9.   def each_leaf
  10.     self.each { |node| yield(node) if not node.hasChildren? }
  11.   end
  12.  
  13.   # Both assume exactly 2 children
  14.   def left_child;  children.first; end
  15.   def right_child; children.last; end
  16. end
  17.  
  18. # FreqNodes are TreeNodes that:
  19. #   1) Have generated, numeric names to ensure they're all unique.
  20. #   2) Compare based primarily on their contents.
  21. #   3) Have as contents an array containing a frequency, and possibly a letter
  22. #      (for the leaves).
  23. #   4) Have a @side attribute that should be set to either :left or :right for
  24. #      non-root nodes.
  25. #   5) Have a few other nicities, like letter().
  26. class FreqNode < Tree::TreeNode
  27.   attr_accessor :side
  28.  
  29.   def initialize(content = nil)
  30.     super(FreqNode.next_name, content)
  31.   end
  32.  
  33.   def <=>(node)
  34.     content_diff = @content <=> node.content
  35.     return content_diff if not content_diff.zero?
  36.     super(node)
  37.   end
  38.  
  39.   # Assumes this is a leaf.
  40.   def letter; @content[1]; end
  41.  
  42.   def to_s; super() + " Side: #{@side}"; end
  43.  
  44.   # Generate unique names because we can't add two nodes with the same name as
  45.   # children of the same parent node.
  46.   @next_name_num = -1
  47.   def FreqNode.next_name
  48.     (@next_name_num += 1).to_s
  49.   end
  50. end
  51.  
  52. class Set
  53.   def shift
  54.     elem = self.min
  55.     self.delete elem
  56.     elem
  57.   end
  58. end
  59.  
  60. # Build a SortedSet containing pairs of [freq, letter] for the given string.
  61. def build_freqs str
  62.   # Build hash of byte => count
  63.   counts = Hash.new 0
  64.   str.each_byte { |byte| counts[byte.chr] += 1 }
  65.  
  66.   # Build SortedSet of [freq, byte] pairs (lower freqs first).
  67.   freqs = SortedSet[]
  68.   counts.each { |bc| freqs << bc.reverse }
  69.   freqs
  70. end
  71.  
  72. # Build the tree for the given input string, and return the root node.
  73. def build_tree str
  74.   nodes = build_freqs(str).map! { |pair| FreqNode.new(pair) }
  75.  
  76.   while nodes.size > 1
  77.     child1, child2 = nodes.shift, nodes.shift
  78.     parent = FreqNode.new([child1.content.first + child2.content.first])
  79.     child1.side, child2.side = :left, :right
  80.     parent << child1
  81.     parent << child2
  82.     nodes << parent
  83.   end
  84.  
  85.   nodes.min
  86. end
  87.  
  88. # Encode the given letter using the given tree of FreqNodes.
  89. def encode_letter letter, tree
  90.   enc = ''
  91.  
  92.   # Find leaf with the right byte value
  93.   node = nil
  94.   tree.each_leaf do |leaf|
  95.     (node = leaf; break) if leaf.letter == letter
  96.   end
  97.  
  98.   while not node.isRoot?
  99.     node.side == :left ? enc << '0' : enc << '1'
  100.     node = node.parent
  101.   end
  102.  
  103.   enc.reverse
  104. end
  105.  
  106. # Build a tree for the given string, and encode it using that tree.
  107. def encode str, tree = build_tree(str)
  108.   #tree = build_tree(str)
  109.   encoded = ''
  110.   str.each_byte { |byte| encoded << encode_letter(byte.chr, tree) }
  111.   encoded
  112. end
  113.  
  114. # Decode the given string (which should consist of '0's and '1's) using the
  115. # given tree.
  116. def decode str, tree
  117.   dec = ''
  118.   i = 0
  119.   node = tree
  120.  
  121.   str.each_byte do |byte|
  122.     node = (byte.chr == '0' ? node.left_child : node.right_child)
  123.     if not node.hasChildren?
  124.       dec << node.byte_value.chr
  125.       node = tree
  126.     end
  127.   end
  128.  
  129.   dec
  130. end
  131.  
  132. class String
  133.   # Split up the string by inserting sep between len-length chunks.
  134.   def split_up! len, sep
  135.     (((length / len.to_f).ceil - 1) * len).step(len, -len) do |i|
  136.       insert i, sep
  137.     end
  138.     self
  139.   end
  140. end
  141.  
  142. # Output a binary string, splitting it up for easier reading.
  143. def puts_binary str
  144.   str = str.dup
  145.   str.split_up! 40, "\n"
  146.   str.each { |line| puts line.split_up!(8, ' ') }
  147. end
  148.  
  149. if __FILE__ == $0
  150.   input = ARGV.join ' '
  151.   input_bytes = input.length
  152.   enc = encode(input)
  153.   enc_bytes = (enc.length / 8.0).ceil
  154.   compressed = ((1 - enc_bytes.to_f/input_bytes) * 100).round
  155.  
  156.   puts 'Encoded:'
  157.   puts_binary(enc)
  158.   puts "Encoded bytes: #{enc_bytes}\n\n"
  159.  
  160.   puts "Original:\n#{input}"
  161.   puts "Original bytes: #{input_bytes}\n\n"
  162.  
  163.   puts "Compressed: #{compressed}%"
  164. end
RAW Paste Data
Top