daily pastebin goal
24%
SHARE
TWEET

Untitled

a guest May 17th, 2018 110 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env ruby
  2.  
  3. require 'rubygems'
  4. require 'tree'
  5. require 'set'
  6.  
  7. class Tree::TreeNode
  8.   # Yield once for each leaf node.
  9.   def each_leaf
  10.     self.each { |node| yield(node) if not node.hasChildren? }
  11.   end
  12.  
  13.   # Both assume exactly 2 children
  14.   def left_child;  children.first; end
  15.   def right_child; children.last; end
  16. end
  17.  
  18. # FreqNodes are TreeNodes that:
  19. #   1) Have generated, numeric names to ensure they're all unique.
  20. #   2) Compare based primarily on their contents.
  21. #   3) Have as contents an array containing a frequency, and possibly a letter
  22. #      (for the leaves).
  23. #   4) Have a @side attribute that should be set to either :left or :right for
  24. #      non-root nodes.
  25. #   5) Have a few other nicities, like letter().
  26. class FreqNode < Tree::TreeNode
  27.   attr_accessor :side
  28.  
  29.   def initialize(content = nil)
  30.     super(FreqNode.next_name, content)
  31.   end
  32.  
  33.   def <=>(node)
  34.     content_diff = @content <=> node.content
  35.     return content_diff if not content_diff.zero?
  36.     super(node)
  37.   end
  38.  
  39.   # Assumes this is a leaf.
  40.   def letter; @content[1]; end
  41.  
  42.   def to_s; super() + " Side: #{@side}"; end
  43.  
  44.   # Generate unique names because we can't add two nodes with the same name as
  45.   # children of the same parent node.
  46.   @next_name_num = -1
  47.   def FreqNode.next_name
  48.     (@next_name_num += 1).to_s
  49.   end
  50. end
  51.  
  52. class Set
  53.   def shift
  54.     elem = self.min
  55.     self.delete elem
  56.     elem
  57.   end
  58. end
  59.  
  60. # Build a SortedSet containing pairs of [freq, letter] for the given string.
  61. def build_freqs str
  62.   # Build hash of byte => count
  63.   counts = Hash.new 0
  64.   str.each_byte { |byte| counts[byte.chr] += 1 }
  65.  
  66.   # Build SortedSet of [freq, byte] pairs (lower freqs first).
  67.   freqs = SortedSet[]
  68.   counts.each { |bc| freqs << bc.reverse }
  69.   freqs
  70. end
  71.  
  72. # Build the tree for the given input string, and return the root node.
  73. def build_tree str
  74.   nodes = build_freqs(str).map! { |pair| FreqNode.new(pair) }
  75.  
  76.   while nodes.size > 1
  77.     child1, child2 = nodes.shift, nodes.shift
  78.     parent = FreqNode.new([child1.content.first + child2.content.first])
  79.     child1.side, child2.side = :left, :right
  80.     parent << child1
  81.     parent << child2
  82.     nodes << parent
  83.   end
  84.  
  85.   nodes.min
  86. end
  87.  
  88. # Encode the given letter using the given tree of FreqNodes.
  89. def encode_letter letter, tree
  90.   enc = ''
  91.  
  92.   # Find leaf with the right byte value
  93.   node = nil
  94.   tree.each_leaf do |leaf|
  95.     (node = leaf; break) if leaf.letter == letter
  96.   end
  97.  
  98.   while not node.isRoot?
  99.     node.side == :left ? enc << '0' : enc << '1'
  100.     node = node.parent
  101.   end
  102.  
  103.   enc.reverse
  104. end
  105.  
  106. # Build a tree for the given string, and encode it using that tree.
  107. def encode str, tree = build_tree(str)
  108.   #tree = build_tree(str)
  109.   encoded = ''
  110.   str.each_byte { |byte| encoded << encode_letter(byte.chr, tree) }
  111.   encoded
  112. end
  113.  
  114. # Decode the given string (which should consist of '0's and '1's) using the
  115. # given tree.
  116. def decode str, tree
  117.   dec = ''
  118.   i = 0
  119.   node = tree
  120.  
  121.   str.each_byte do |byte|
  122.     node = (byte.chr == '0' ? node.left_child : node.right_child)
  123.     if not node.hasChildren?
  124.       dec << node.byte_value.chr
  125.       node = tree
  126.     end
  127.   end
  128.  
  129.   dec
  130. end
  131.  
  132. class String
  133.   # Split up the string by inserting sep between len-length chunks.
  134.   def split_up! len, sep
  135.     (((length / len.to_f).ceil - 1) * len).step(len, -len) do |i|
  136.       insert i, sep
  137.     end
  138.     self
  139.   end
  140. end
  141.  
  142. # Output a binary string, splitting it up for easier reading.
  143. def puts_binary str
  144.   str = str.dup
  145.   str.split_up! 40, "\n"
  146.   str.each { |line| puts line.split_up!(8, ' ') }
  147. end
  148.  
  149. if __FILE__ == $0
  150.   input = ARGV.join ' '
  151.   input_bytes = input.length
  152.   enc = encode(input)
  153.   enc_bytes = (enc.length / 8.0).ceil
  154.   compressed = ((1 - enc_bytes.to_f/input_bytes) * 100).round
  155.  
  156.   puts 'Encoded:'
  157.   puts_binary(enc)
  158.   puts "Encoded bytes: #{enc_bytes}\n\n"
  159.  
  160.   puts "Original:\n#{input}"
  161.   puts "Original bytes: #{input_bytes}\n\n"
  162.  
  163.   puts "Compressed: #{compressed}%"
  164. end
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top