Guest User

Untitled

a guest
May 17th, 2018
161
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.95 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2.  
  3. require 'rubygems'
  4. require 'tree'
  5. require 'set'
  6.  
  7. class Tree::TreeNode
  8. # Yield once for each leaf node.
  9. def each_leaf
  10. self.each { |node| yield(node) if not node.hasChildren? }
  11. end
  12.  
  13. # Both assume exactly 2 children
  14. def left_child; children.first; end
  15. def right_child; children.last; end
  16. end
  17.  
  18. # FreqNodes are TreeNodes that:
  19. # 1) Have generated, numeric names to ensure they're all unique.
  20. # 2) Compare based primarily on their contents.
  21. # 3) Have as contents an array containing a frequency, and possibly a letter
  22. # (for the leaves).
  23. # 4) Have a @side attribute that should be set to either :left or :right for
  24. # non-root nodes.
  25. # 5) Have a few other nicities, like letter().
  26. class FreqNode < Tree::TreeNode
  27. attr_accessor :side
  28.  
  29. def initialize(content = nil)
  30. super(FreqNode.next_name, content)
  31. end
  32.  
  33. def <=>(node)
  34. content_diff = @content <=> node.content
  35. return content_diff if not content_diff.zero?
  36. super(node)
  37. end
  38.  
  39. # Assumes this is a leaf.
  40. def letter; @content[1]; end
  41.  
  42. def to_s; super() + " Side: #{@side}"; end
  43.  
  44. # Generate unique names because we can't add two nodes with the same name as
  45. # children of the same parent node.
  46. @next_name_num = -1
  47. def FreqNode.next_name
  48. (@next_name_num += 1).to_s
  49. end
  50. end
  51.  
  52. class Set
  53. def shift
  54. elem = self.min
  55. self.delete elem
  56. elem
  57. end
  58. end
  59.  
  60. # Build a SortedSet containing pairs of [freq, letter] for the given string.
  61. def build_freqs str
  62. # Build hash of byte => count
  63. counts = Hash.new 0
  64. str.each_byte { |byte| counts[byte.chr] += 1 }
  65.  
  66. # Build SortedSet of [freq, byte] pairs (lower freqs first).
  67. freqs = SortedSet[]
  68. counts.each { |bc| freqs << bc.reverse }
  69. freqs
  70. end
  71.  
  72. # Build the tree for the given input string, and return the root node.
  73. def build_tree str
  74. nodes = build_freqs(str).map! { |pair| FreqNode.new(pair) }
  75.  
  76. while nodes.size > 1
  77. child1, child2 = nodes.shift, nodes.shift
  78. parent = FreqNode.new([child1.content.first + child2.content.first])
  79. child1.side, child2.side = :left, :right
  80. parent << child1
  81. parent << child2
  82. nodes << parent
  83. end
  84.  
  85. nodes.min
  86. end
  87.  
  88. # Encode the given letter using the given tree of FreqNodes.
  89. def encode_letter letter, tree
  90. enc = ''
  91.  
  92. # Find leaf with the right byte value
  93. node = nil
  94. tree.each_leaf do |leaf|
  95. (node = leaf; break) if leaf.letter == letter
  96. end
  97.  
  98. while not node.isRoot?
  99. node.side == :left ? enc << '0' : enc << '1'
  100. node = node.parent
  101. end
  102.  
  103. enc.reverse
  104. end
  105.  
  106. # Build a tree for the given string, and encode it using that tree.
  107. def encode str, tree = build_tree(str)
  108. #tree = build_tree(str)
  109. encoded = ''
  110. str.each_byte { |byte| encoded << encode_letter(byte.chr, tree) }
  111. encoded
  112. end
  113.  
  114. # Decode the given string (which should consist of '0's and '1's) using the
  115. # given tree.
  116. def decode str, tree
  117. dec = ''
  118. i = 0
  119. node = tree
  120.  
  121. str.each_byte do |byte|
  122. node = (byte.chr == '0' ? node.left_child : node.right_child)
  123. if not node.hasChildren?
  124. dec << node.byte_value.chr
  125. node = tree
  126. end
  127. end
  128.  
  129. dec
  130. end
  131.  
  132. class String
  133. # Split up the string by inserting sep between len-length chunks.
  134. def split_up! len, sep
  135. (((length / len.to_f).ceil - 1) * len).step(len, -len) do |i|
  136. insert i, sep
  137. end
  138. self
  139. end
  140. end
  141.  
  142. # Output a binary string, splitting it up for easier reading.
  143. def puts_binary str
  144. str = str.dup
  145. str.split_up! 40, "\n"
  146. str.each { |line| puts line.split_up!(8, ' ') }
  147. end
  148.  
  149. if __FILE__ == $0
  150. input = ARGV.join ' '
  151. input_bytes = input.length
  152. enc = encode(input)
  153. enc_bytes = (enc.length / 8.0).ceil
  154. compressed = ((1 - enc_bytes.to_f/input_bytes) * 100).round
  155.  
  156. puts 'Encoded:'
  157. puts_binary(enc)
  158. puts "Encoded bytes: #{enc_bytes}\n\n"
  159.  
  160. puts "Original:\n#{input}"
  161. puts "Original bytes: #{input_bytes}\n\n"
  162.  
  163. puts "Compressed: #{compressed}%"
  164. end
Add Comment
Please, Sign In to add comment