This week only. Pastebin PRO Accounts Christmas Special! Don't miss out!Want more features on Pastebin? Sign Up, it's FREE!
Guest

benchmark-compression.rb

By: a guest on Sep 3rd, 2010  |  syntax: Ruby  |  size: 8.06 KB  |  views: 209  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #!/usr/bin/env ruby
  2.  
  3. require 'benchmark'
  4.  
  5. PATH = ARGV[0]
  6. if PATH == nil || File.file?(PATH)
  7.         puts "Need a source directory to start the benchmark"
  8.         exit(1)
  9. end
  10.  
  11. DEVSHM = "/dev/shm"
  12. if File.file?(DEVSHM)
  13.         puts "No tmpfs at #{DEVSHM} available"
  14.         exit(2)
  15. end
  16.  
  17. TITLE = File.basename(PATH).sub('.', '')
  18. N_MEMVALS = 10
  19.  
  20. # formats: { format_label => {:file_extension, :compression_command, :decompression_command}, ... }
  21. formats = {
  22.         'Lzop'  => {:extension => 'lzo', :compress => 'lzop -U', :decompress => 'lzop -Ud'},
  23.         'Gzip'  => {:extension => 'gz',  :compress => 'gzip',    :decompress => 'gunzip'},
  24.         'Bzip2' => {:extension => 'bz2', :compress => 'bzip2',   :decompress => 'bunzip2'},
  25.         '7-Zip' => {:extension => '7z',
  26.                 :compress => '/usr/lib/p7zip/7zr a -r -mmt=off',
  27.                 :decompress => '/usr/lib/p7zip/7zr x -y -mmt=off'},
  28.         'Xz'    => {:extension => 'xz',  :compress => 'xz',      :decompress => 'xz -d'},
  29. }
  30.  
  31. # tee(str):
  32. #
  33. # Prints %str to the standard output and to the file #{TITLE}-summary.txt.
  34. def tee(str = '')
  35.         if @tee_file == nil
  36.                 @tee_file = File.open("#{TITLE}-summary.txt", 'w')
  37.         end
  38.         puts str
  39.         @tee_file.puts str
  40. end
  41.  
  42. # Array#n_values(n):
  43. #
  44. # Return an array with %n values. Two last elements are respectively the
  45. # average value and the last value if the array has more than %n values,
  46. # otherwise the last value is repeated till %n.
  47. class Array
  48.         def n_values(n)
  49.                 arr = self
  50.                 if arr.count > n
  51.                         tmp = arr[(n-1)..arr.count]
  52.                         last_val = tmp.reduce(:+) / tmp.count
  53.                         arr[n-2] = last_val
  54.                         arr[n-1] = tmp.last
  55.                 end
  56.                 arr = arr[0, n]
  57.                 if arr.count < n
  58.                         count = i = arr.count
  59.                         while i < n
  60.                                 arr[i] = arr[count - 1]
  61.                                 i = i + 1
  62.                         end
  63.                 end
  64.                 arr
  65.         end
  66. end
  67.  
  68. # bm_process(command):
  69. #
  70. # Returns the time and memory (as array) needed for %command to perform.
  71. def bm_process(command)
  72.         p = 0
  73.         mem = Array.new
  74.  
  75.         t = Thread.new do
  76.                 while true do
  77.                         next if p == 0
  78.                         File.open("/proc/#{p}/status").each do |line|
  79.                                 mem << line.sub(/.*:\s+/, '').to_i if line =~ /^VmRSS:.*/
  80.                         end
  81.                         sleep 0.5
  82.                 end
  83.         end
  84.  
  85.         time = Benchmark.realtime do
  86.                 p = fork do
  87.                         exec(command)
  88.                 end
  89.                 Process.waitpid(p)
  90.                 t.exit
  91.         end
  92.  
  93.         tee "\tCPU time (s): #{time}"
  94.         if mem.count > 1
  95.                 tee "\tMax/Average RSS memory (kB): #{mem.max}/#{mem.drop(1).reduce(:+) / mem.drop(1).count}"
  96.         else
  97.                 tee "\tRSS memory (kB): #{mem[0]}"
  98.         end
  99.  
  100.         return time, mem
  101. end
  102.  
  103. # Start the benchmarking
  104. memory = []
  105. cpu_time = [['Format Type', 'Compression Time (s)', 'Decompression Time (s)']]
  106. file_size = [['Format Type', 'File Size (KiB)']]
  107.  
  108. tee "Benchmarking #{formats.count} compression tools on '#{TITLE}'..."
  109.  
  110. begin
  111.         tmppath = DEVSHM + "/" + TITLE + ".tmp"
  112.  
  113.         # Create tar archive
  114.         `tar cf #{tmppath}.tar #{PATH}`
  115.  
  116.         # Display number of files
  117.         tee "Files and directories: " + `tar tf #{tmppath}.tar | wc -l`
  118.  
  119.         # Display archive file size
  120.         filesize = File.size?("#{tmppath}.tar")
  121.         file_size << ["Pristine", filesize]
  122.         tee "File size (KiB): " + (filesize / 1024).to_s
  123.         tee
  124.  
  125.         formats.each do |k, v|
  126.                 compress_command = "#{v[:compress]} #{tmppath}.tar"
  127.                 decompress_command = "#{v[:decompress]} #{tmppath}.tar.#{v[:extension]}"
  128.                 if k == '7-Zip'
  129.                         compress_command = "#{v[:compress]} #{tmppath}.tar.7z #{tmppath}.tar"
  130.                         decompress_command = "#{v[:decompress]} -o#{File.dirname(tmppath)} #{tmppath}.tar.7z"
  131.                 end
  132.  
  133.                 # Time consumed by CPU for compressing
  134.                 tee "#{k}"
  135.                 compression = bm_process(compress_command)
  136.  
  137.                 # Display archive file size
  138.                 filesize = File.size?("#{tmppath}.tar.#{v[:extension]}")
  139.                 tee "\tFile size (KiB): " + (filesize / 1024).to_s
  140.  
  141.                 # Time consumed by CPU for decompressing
  142.                 tee "#{k} (dec)"
  143.                 decompression = bm_process(decompress_command)
  144.  
  145.                 tee
  146.  
  147.                 memory << [k, compression[1].n_values(N_MEMVALS)].flatten
  148.                 memory << ["#{k} (dec)", decompression[1].n_values(N_MEMVALS)].flatten
  149.                 cpu_time << [k, compression[0], decompression[0]]
  150.                 file_size << [k, filesize]
  151.         end
  152. ensure
  153.         `rm -rf #{tmppath}.tar*`
  154. end
  155.  
  156. # Write CSV files
  157. require 'csv'
  158.  
  159. memory = memory.transpose
  160. CSV.open("#{TITLE}-memory.csv", 'w') do |writer|
  161.         memory.each do |row|
  162.                 writer << row
  163.         end
  164. end
  165.  
  166. CSV.open("#{TITLE}-cpu-time.csv", 'w') do |writer|
  167.         cpu_time.each do |row|
  168.                 writer << row
  169.         end
  170. end
  171.  
  172. CSV.open("#{TITLE}-file-size.csv", 'w') do |writer|
  173.         file_size.each do |row|
  174.                 writer << row
  175.         end
  176. end
  177.  
  178. # Retrieve charts through Google Chart API
  179. require 'gchart'
  180.  
  181. def max_value(v)
  182.         v + v * 5 / 100
  183. end
  184.  
  185. def max_value_f(v)
  186.         if v < 1
  187.                 v = (v * 10.0).ceil / 10.0
  188.         else
  189.                 v = v.ceil
  190.         end
  191.         v
  192. end
  193.  
  194. def y_axis_values(max)
  195.         Array.new(6) {|i| ((i * 0.2) * max).to_i}
  196. end
  197.  
  198. def y_axis_values_f(max, dec = 4.0)
  199.         dec = dec.to_f
  200.         Array.new(6) {|i| ((i * 0.2) * max * 10 ** dec).to_i / 10 ** dec}
  201. end
  202.  
  203. def markers(labels, step)
  204.         markers = Array.new(labels.count) do |i|
  205.                 "A#{labels[i]},666666,#{i},#{(i+1)*step},12"
  206.         end
  207. end
  208.  
  209. # Generate Memory Chart
  210. puts "Writing '#{TITLE}-memory.png'..."
  211. bar_colors = ['76A4FB', 'FFCC33', '9FF670', '990066', 'BBCCED', '224499', '70AE4F']
  212. chart_data = memory.drop(1).transpose
  213. chart_legend = memory[0]
  214. max_value = chart_data.flatten.max
  215. max_value = max_value + max_value * 15 / 100
  216. markers = markers(chart_legend, (N_MEMVALS - 2) / (chart_legend.count).to_f).join('|')
  217. markers = markers + "|R,CCCCCC,0,#{(N_MEMVALS-2)/N_MEMVALS.to_f-0.02},#{(N_MEMVALS-1)/N_MEMVALS.to_f+0.005}" \
  218.         if cpu_time.drop(1).transpose.drop(1).flatten.max > N_MEMVALS / 2 # snapshot timeout in bm_process is 1/2 second
  219. Gchart.line(
  220.         :title => 'RSS Memory (MiB)',
  221.         :size => '700x325',
  222.         :encoding => 'text',
  223.         :data => chart_data,
  224.         :axis_with_labels => 'x,y',
  225.         :axis_labels => [['0', "\u221E"], y_axis_values_f(max_value / 1024.0, 1.0)],
  226.         :min_value => 0,
  227.         :max_value => max_value,
  228.         :axis_range => [[0, max_value]],
  229.         :legend => chart_legend,
  230.         :grid_lines => '0,10,0,0',
  231.         :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
  232.         :bar_colors => bar_colors[0..(chart_data.count)],
  233.         :thickness => ([3] * chart_data.count).join('|'),
  234.         :new_markers => markers,
  235.         :format => 'file',
  236.         :filename => "#{TITLE}-memory.png")
  237.  
  238. # Generate CPU Time Chart
  239. puts "Writing '#{TITLE}-cpu-time.png'..."
  240. chart_array = cpu_time.drop(1).transpose
  241. max_value = max_value_f(chart_array[1].max) + max_value_f(chart_array[2].max)
  242. Gchart.bar(
  243.         :grouped => false,
  244.         :title => 'CPU Time (s)',
  245.         :size => '350x245',
  246.         :encoding => 'text',
  247.         :data => [chart_array[1], chart_array[2]],
  248.         :axis_with_labels => 'x,y',
  249.         :axis_labels => [chart_array[0], y_axis_values_f(max_value)],
  250.         :min_value => 0,
  251.         :max_value => max_value,
  252.         :axis_range => [[0, max_value]],
  253.         :legend => ['Compression', 'Decompression'],
  254.         :grid_lines => '0,10,0,0',
  255.         :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
  256.         :bar_colors => ['3072F3', 'FF9900'],
  257.         :new_markers => 'N**,000000,0,-1,12,,c|N**,000000,1,-1,12&chdlp=t',
  258.         :format => 'file',
  259.         :filename => "#{TITLE}-cpu-time.png")
  260.  
  261. # Generate File Size Chart
  262. puts "Writing '#{TITLE}-file-size.png'..."
  263. chart_array = file_size.drop(1).transpose
  264. chart_array[1] = chart_array[1].collect {|i| i/1024/1024.0}
  265. max_value = max_value_f(chart_array[1].max)
  266. Gchart.bar(
  267.         :title => 'File Size (MiB)',
  268.         :size => '350x245',
  269.         :encoding => 'text',
  270.         :data => chart_array[1],
  271.         :axis_with_labels => 'x,y',
  272.         :axis_labels => [chart_array[0], y_axis_values_f(max_value, 2.0)],
  273.         :min_value => 0,
  274.         :max_value => max_value,
  275.         :axis_range => [[0, max_value]],
  276.         :grid_lines => '0,10,0,0',
  277.         :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0 },
  278.         :bar_colors => '3072F3',
  279.         :new_markers => 'N**,000000,0,-1,12',
  280.         :format => 'file',
  281.         :filename => "#{TITLE}-file-size.png")
  282.  
  283. # Mashup the charts into one image
  284. puts "Writing '#{TITLE}-3charts.png'..."
  285. img1 = "#{TITLE}-file-size.png"
  286. img2 = "#{TITLE}-cpu-time.png"
  287. img3 = "#{TITLE}-memory.png"
  288. img_output = "#{TITLE}-3charts.png"
  289. begin
  290.         `convert #{img1} #{img2} +append #{img_output}`
  291.         `convert #{img3} #{img_output} -append #{img_output}`
  292. rescue
  293.         puts "The command line tool 'convert' from ImageMagick is not installed"
  294.         exit(3)
  295. end
clone this paste RAW Paste Data