Guest User

benchmark-compression.rb

a guest
Sep 3rd, 2010
380
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env ruby
  2.  
  3. require 'benchmark'
  4.  
  5. PATH = ARGV[0]
  6. if PATH == nil || File.file?(PATH)
  7.     puts "Need a source directory to start the benchmark"
  8.     exit(1)
  9. end
  10.  
  11. DEVSHM = "/dev/shm"
  12. if File.file?(DEVSHM)
  13.     puts "No tmpfs at #{DEVSHM} available"
  14.     exit(2)
  15. end
  16.  
  17. TITLE = File.basename(PATH).sub('.', '')
  18. N_MEMVALS = 10
  19.  
  20. # formats: { format_label => {:file_extension, :compression_command, :decompression_command}, ... }
  21. formats = {
  22.     'Lzop'  => {:extension => 'lzo', :compress => 'lzop -U', :decompress => 'lzop -Ud'},
  23.     'Gzip'  => {:extension => 'gz',  :compress => 'gzip',    :decompress => 'gunzip'},
  24.     'Bzip2' => {:extension => 'bz2', :compress => 'bzip2',   :decompress => 'bunzip2'},
  25.     '7-Zip' => {:extension => '7z',
  26.         :compress => '/usr/lib/p7zip/7zr a -r -mmt=off',
  27.         :decompress => '/usr/lib/p7zip/7zr x -y -mmt=off'},
  28.     'Xz'    => {:extension => 'xz',  :compress => 'xz',      :decompress => 'xz -d'},
  29. }
  30.  
  31. # tee(str):
  32. #
  33. # Prints %str to the standard output and to the file #{TITLE}-summary.txt.
  34. def tee(str = '')
  35.     if @tee_file == nil
  36.         @tee_file = File.open("#{TITLE}-summary.txt", 'w')
  37.     end
  38.     puts str
  39.     @tee_file.puts str
  40. end
  41.  
  42. # Array#n_values(n):
  43. #
  44. # Return an array with %n values. Two last elements are respectively the
  45. # average value and the last value if the array has more than %n values,
  46. # otherwise the last value is repeated till %n.
  47. class Array
  48.     def n_values(n)
  49.         arr = self
  50.         if arr.count > n
  51.             tmp = arr[(n-1)..arr.count]
  52.             last_val = tmp.reduce(:+) / tmp.count
  53.             arr[n-2] = last_val
  54.             arr[n-1] = tmp.last
  55.         end
  56.         arr = arr[0, n]
  57.         if arr.count < n
  58.             count = i = arr.count
  59.             while i < n
  60.                 arr[i] = arr[count - 1]
  61.                 i = i + 1
  62.             end
  63.         end
  64.         arr
  65.     end
  66. end
  67.  
  68. # bm_process(command):
  69. #
  70. # Returns the time and memory (as array) needed for %command to perform.
  71. def bm_process(command)
  72.     p = 0
  73.     mem = Array.new
  74.  
  75.     t = Thread.new do
  76.         while true do
  77.             next if p == 0
  78.             File.open("/proc/#{p}/status").each do |line|
  79.                 mem << line.sub(/.*:\s+/, '').to_i if line =~ /^VmRSS:.*/
  80.             end
  81.             sleep 0.5
  82.         end
  83.     end
  84.  
  85.     time = Benchmark.realtime do
  86.         p = fork do
  87.             exec(command)
  88.         end
  89.         Process.waitpid(p)
  90.         t.exit
  91.     end
  92.  
  93.     tee "\tCPU time (s): #{time}"
  94.     if mem.count > 1
  95.         tee "\tMax/Average RSS memory (kB): #{mem.max}/#{mem.drop(1).reduce(:+) / mem.drop(1).count}"
  96.     else
  97.         tee "\tRSS memory (kB): #{mem[0]}"
  98.     end
  99.  
  100.     return time, mem
  101. end
  102.  
  103. # Start the benchmarking
  104. memory = []
  105. cpu_time = [['Format Type', 'Compression Time (s)', 'Decompression Time (s)']]
  106. file_size = [['Format Type', 'File Size (KiB)']]
  107.  
  108. tee "Benchmarking #{formats.count} compression tools on '#{TITLE}'..."
  109.  
  110. begin
  111.     tmppath = DEVSHM + "/" + TITLE + ".tmp"
  112.  
  113.     # Create tar archive
  114.     `tar cf #{tmppath}.tar #{PATH}`
  115.  
  116.     # Display number of files
  117.     tee "Files and directories: " + `tar tf #{tmppath}.tar | wc -l`
  118.  
  119.     # Display archive file size
  120.     filesize = File.size?("#{tmppath}.tar")
  121.     file_size << ["Pristine", filesize]
  122.     tee "File size (KiB): " + (filesize / 1024).to_s
  123.     tee
  124.  
  125.     formats.each do |k, v|
  126.         compress_command = "#{v[:compress]} #{tmppath}.tar"
  127.         decompress_command = "#{v[:decompress]} #{tmppath}.tar.#{v[:extension]}"
  128.         if k == '7-Zip'
  129.             compress_command = "#{v[:compress]} #{tmppath}.tar.7z #{tmppath}.tar"
  130.             decompress_command = "#{v[:decompress]} -o#{File.dirname(tmppath)} #{tmppath}.tar.7z"
  131.         end
  132.  
  133.         # Time consumed by CPU for compressing
  134.         tee "#{k}"
  135.         compression = bm_process(compress_command)
  136.  
  137.         # Display archive file size
  138.         filesize = File.size?("#{tmppath}.tar.#{v[:extension]}")
  139.         tee "\tFile size (KiB): " + (filesize / 1024).to_s
  140.  
  141.         # Time consumed by CPU for decompressing
  142.         tee "#{k} (dec)"
  143.         decompression = bm_process(decompress_command)
  144.  
  145.         tee
  146.  
  147.         memory << [k, compression[1].n_values(N_MEMVALS)].flatten
  148.         memory << ["#{k} (dec)", decompression[1].n_values(N_MEMVALS)].flatten
  149.         cpu_time << [k, compression[0], decompression[0]]
  150.         file_size << [k, filesize]
  151.     end
  152. ensure
  153.     `rm -rf #{tmppath}.tar*`
  154. end
  155.  
  156. # Write CSV files
  157. require 'csv'
  158.  
  159. memory = memory.transpose
  160. CSV.open("#{TITLE}-memory.csv", 'w') do |writer|
  161.     memory.each do |row|
  162.         writer << row
  163.     end
  164. end
  165.  
  166. CSV.open("#{TITLE}-cpu-time.csv", 'w') do |writer|
  167.     cpu_time.each do |row|
  168.         writer << row
  169.     end
  170. end
  171.  
  172. CSV.open("#{TITLE}-file-size.csv", 'w') do |writer|
  173.     file_size.each do |row|
  174.         writer << row
  175.     end
  176. end
  177.  
  178. # Retrieve charts through Google Chart API
  179. require 'gchart'
  180.  
  181. def max_value(v)
  182.     v + v * 5 / 100
  183. end
  184.  
  185. def max_value_f(v)
  186.     if v < 1
  187.         v = (v * 10.0).ceil / 10.0
  188.     else
  189.         v = v.ceil
  190.     end
  191.     v
  192. end
  193.  
  194. def y_axis_values(max)
  195.     Array.new(6) {|i| ((i * 0.2) * max).to_i}
  196. end
  197.  
  198. def y_axis_values_f(max, dec = 4.0)
  199.     dec = dec.to_f
  200.     Array.new(6) {|i| ((i * 0.2) * max * 10 ** dec).to_i / 10 ** dec}
  201. end
  202.  
  203. def markers(labels, step)
  204.     markers = Array.new(labels.count) do |i|
  205.         "A#{labels[i]},666666,#{i},#{(i+1)*step},12"
  206.     end
  207. end
  208.  
  209. # Generate Memory Chart
  210. puts "Writing '#{TITLE}-memory.png'..."
  211. bar_colors = ['76A4FB', 'FFCC33', '9FF670', '990066', 'BBCCED', '224499', '70AE4F']
  212. chart_data = memory.drop(1).transpose
  213. chart_legend = memory[0]
  214. max_value = chart_data.flatten.max
  215. max_value = max_value + max_value * 15 / 100
  216. markers = markers(chart_legend, (N_MEMVALS - 2) / (chart_legend.count).to_f).join('|')
  217. markers = markers + "|R,CCCCCC,0,#{(N_MEMVALS-2)/N_MEMVALS.to_f-0.02},#{(N_MEMVALS-1)/N_MEMVALS.to_f+0.005}" \
  218.     if cpu_time.drop(1).transpose.drop(1).flatten.max > N_MEMVALS / 2 # snapshot timeout in bm_process is 1/2 second
  219. Gchart.line(
  220.     :title => 'RSS Memory (MiB)',
  221.     :size => '700x325',
  222.     :encoding => 'text',
  223.     :data => chart_data,
  224.     :axis_with_labels => 'x,y',
  225.     :axis_labels => [['0', "\u221E"], y_axis_values_f(max_value / 1024.0, 1.0)],
  226.     :min_value => 0,
  227.     :max_value => max_value,
  228.     :axis_range => [[0, max_value]],
  229.     :legend => chart_legend,
  230.     :grid_lines => '0,10,0,0',
  231.     :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
  232.     :bar_colors => bar_colors[0..(chart_data.count)],
  233.     :thickness => ([3] * chart_data.count).join('|'),
  234.     :new_markers => markers,
  235.     :format => 'file',
  236.     :filename => "#{TITLE}-memory.png")
  237.  
  238. # Generate CPU Time Chart
  239. puts "Writing '#{TITLE}-cpu-time.png'..."
  240. chart_array = cpu_time.drop(1).transpose
  241. max_value = max_value_f(chart_array[1].max) + max_value_f(chart_array[2].max)
  242. Gchart.bar(
  243.     :grouped => false,
  244.     :title => 'CPU Time (s)',
  245.     :size => '350x245',
  246.     :encoding => 'text',
  247.     :data => [chart_array[1], chart_array[2]],
  248.     :axis_with_labels => 'x,y',
  249.     :axis_labels => [chart_array[0], y_axis_values_f(max_value)],
  250.     :min_value => 0,
  251.     :max_value => max_value,
  252.     :axis_range => [[0, max_value]],
  253.     :legend => ['Compression', 'Decompression'],
  254.     :grid_lines => '0,10,0,0',
  255.     :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
  256.     :bar_colors => ['3072F3', 'FF9900'],
  257.     :new_markers => 'N**,000000,0,-1,12,,c|N**,000000,1,-1,12&chdlp=t',
  258.     :format => 'file',
  259.     :filename => "#{TITLE}-cpu-time.png")
  260.  
  261. # Generate File Size Chart
  262. puts "Writing '#{TITLE}-file-size.png'..."
  263. chart_array = file_size.drop(1).transpose
  264. chart_array[1] = chart_array[1].collect {|i| i/1024/1024.0}
  265. max_value = max_value_f(chart_array[1].max)
  266. Gchart.bar(
  267.     :title => 'File Size (MiB)',
  268.     :size => '350x245',
  269.     :encoding => 'text',
  270.     :data => chart_array[1],
  271.     :axis_with_labels => 'x,y',
  272.     :axis_labels => [chart_array[0], y_axis_values_f(max_value, 2.0)],
  273.     :min_value => 0,
  274.     :max_value => max_value,
  275.     :axis_range => [[0, max_value]],
  276.     :grid_lines => '0,10,0,0',
  277.     :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0 },
  278.     :bar_colors => '3072F3',
  279.     :new_markers => 'N**,000000,0,-1,12',
  280.     :format => 'file',
  281.     :filename => "#{TITLE}-file-size.png")
  282.  
  283. # Mashup the charts into one image
  284. puts "Writing '#{TITLE}-3charts.png'..."
  285. img1 = "#{TITLE}-file-size.png"
  286. img2 = "#{TITLE}-cpu-time.png"
  287. img3 = "#{TITLE}-memory.png"
  288. img_output = "#{TITLE}-3charts.png"
  289. begin
  290.     `convert #{img1} #{img2} +append #{img_output}`
  291.     `convert #{img3} #{img_output} -append #{img_output}`
  292. rescue
  293.     puts "The command line tool 'convert' from ImageMagick is not installed"
  294.     exit(3)
  295. end
RAW Paste Data