#!/usr/bin/env ruby require 'benchmark' PATH = ARGV[0] if PATH == nil || File.file?(PATH) puts "Need a source directory to start the benchmark" exit(1) end DEVSHM = "/dev/shm" if File.file?(DEVSHM) puts "No tmpfs at #{DEVSHM} available" exit(2) end TITLE = File.basename(PATH).sub('.', '') N_MEMVALS = 10 # formats: { format_label => {:file_extension, :compression_command, :decompression_command}, ... } formats = { 'Lzop' => {:extension => 'lzo', :compress => 'lzop -U', :decompress => 'lzop -Ud'}, 'Gzip' => {:extension => 'gz', :compress => 'gzip', :decompress => 'gunzip'}, 'Bzip2' => {:extension => 'bz2', :compress => 'bzip2', :decompress => 'bunzip2'}, '7-Zip' => {:extension => '7z', :compress => '/usr/lib/p7zip/7zr a -r -mmt=off', :decompress => '/usr/lib/p7zip/7zr x -y -mmt=off'}, 'Xz' => {:extension => 'xz', :compress => 'xz', :decompress => 'xz -d'}, } # tee(str): # # Prints %str to the standard output and to the file #{TITLE}-summary.txt. def tee(str = '') if @tee_file == nil @tee_file = File.open("#{TITLE}-summary.txt", 'w') end puts str @tee_file.puts str end # Array#n_values(n): # # Return an array with %n values. Two last elements are respectively the # average value and the last value if the array has more than %n values, # otherwise the last value is repeated till %n. class Array def n_values(n) arr = self if arr.count > n tmp = arr[(n-1)..arr.count] last_val = tmp.reduce(:+) / tmp.count arr[n-2] = last_val arr[n-1] = tmp.last end arr = arr[0, n] if arr.count < n count = i = arr.count while i < n arr[i] = arr[count - 1] i = i + 1 end end arr end end # bm_process(command): # # Returns the time and memory (as array) needed for %command to perform. def bm_process(command) p = 0 mem = Array.new t = Thread.new do while true do next if p == 0 File.open("/proc/#{p}/status").each do |line| mem << line.sub(/.*:\s+/, '').to_i if line =~ /^VmRSS:.*/ end sleep 0.5 end end time = Benchmark.realtime do p = fork do exec(command) end Process.waitpid(p) t.exit end tee "\tCPU time (s): #{time}" if mem.count > 1 tee "\tMax/Average RSS memory (kB): #{mem.max}/#{mem.drop(1).reduce(:+) / mem.drop(1).count}" else tee "\tRSS memory (kB): #{mem[0]}" end return time, mem end # Start the benchmarking memory = [] cpu_time = [['Format Type', 'Compression Time (s)', 'Decompression Time (s)']] file_size = [['Format Type', 'File Size (KiB)']] tee "Benchmarking #{formats.count} compression tools on '#{TITLE}'..." begin tmppath = DEVSHM + "/" + TITLE + ".tmp" # Create tar archive `tar cf #{tmppath}.tar #{PATH}` # Display number of files tee "Files and directories: " + `tar tf #{tmppath}.tar | wc -l` # Display archive file size filesize = File.size?("#{tmppath}.tar") file_size << ["Pristine", filesize] tee "File size (KiB): " + (filesize / 1024).to_s tee formats.each do |k, v| compress_command = "#{v[:compress]} #{tmppath}.tar" decompress_command = "#{v[:decompress]} #{tmppath}.tar.#{v[:extension]}" if k == '7-Zip' compress_command = "#{v[:compress]} #{tmppath}.tar.7z #{tmppath}.tar" decompress_command = "#{v[:decompress]} -o#{File.dirname(tmppath)} #{tmppath}.tar.7z" end # Time consumed by CPU for compressing tee "#{k}" compression = bm_process(compress_command) # Display archive file size filesize = File.size?("#{tmppath}.tar.#{v[:extension]}") tee "\tFile size (KiB): " + (filesize / 1024).to_s # Time consumed by CPU for decompressing tee "#{k} (dec)" decompression = bm_process(decompress_command) tee memory << [k, compression[1].n_values(N_MEMVALS)].flatten memory << ["#{k} (dec)", decompression[1].n_values(N_MEMVALS)].flatten cpu_time << [k, compression[0], decompression[0]] file_size << [k, filesize] end ensure `rm -rf #{tmppath}.tar*` end # Write CSV files require 'csv' memory = memory.transpose CSV.open("#{TITLE}-memory.csv", 'w') do |writer| memory.each do |row| writer << row end end CSV.open("#{TITLE}-cpu-time.csv", 'w') do |writer| cpu_time.each do |row| writer << row end end CSV.open("#{TITLE}-file-size.csv", 'w') do |writer| file_size.each do |row| writer << row end end # Retrieve charts through Google Chart API require 'gchart' def max_value(v) v + v * 5 / 100 end def max_value_f(v) if v < 1 v = (v * 10.0).ceil / 10.0 else v = v.ceil end v end def y_axis_values(max) Array.new(6) {|i| ((i * 0.2) * max).to_i} end def y_axis_values_f(max, dec = 4.0) dec = dec.to_f Array.new(6) {|i| ((i * 0.2) * max * 10 ** dec).to_i / 10 ** dec} end def markers(labels, step) markers = Array.new(labels.count) do |i| "A#{labels[i]},666666,#{i},#{(i+1)*step},12" end end # Generate Memory Chart puts "Writing '#{TITLE}-memory.png'..." bar_colors = ['76A4FB', 'FFCC33', '9FF670', '990066', 'BBCCED', '224499', '70AE4F'] chart_data = memory.drop(1).transpose chart_legend = memory[0] max_value = chart_data.flatten.max max_value = max_value + max_value * 15 / 100 markers = markers(chart_legend, (N_MEMVALS - 2) / (chart_legend.count).to_f).join('|') markers = markers + "|R,CCCCCC,0,#{(N_MEMVALS-2)/N_MEMVALS.to_f-0.02},#{(N_MEMVALS-1)/N_MEMVALS.to_f+0.005}" \ if cpu_time.drop(1).transpose.drop(1).flatten.max > N_MEMVALS / 2 # snapshot timeout in bm_process is 1/2 second Gchart.line( :title => 'RSS Memory (MiB)', :size => '700x325', :encoding => 'text', :data => chart_data, :axis_with_labels => 'x,y', :axis_labels => [['0', "\u221E"], y_axis_values_f(max_value / 1024.0, 1.0)], :min_value => 0, :max_value => max_value, :axis_range => [[0, max_value]], :legend => chart_legend, :grid_lines => '0,10,0,0', :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0}, :bar_colors => bar_colors[0..(chart_data.count)], :thickness => ([3] * chart_data.count).join('|'), :new_markers => markers, :format => 'file', :filename => "#{TITLE}-memory.png") # Generate CPU Time Chart puts "Writing '#{TITLE}-cpu-time.png'..." chart_array = cpu_time.drop(1).transpose max_value = max_value_f(chart_array[1].max) + max_value_f(chart_array[2].max) Gchart.bar( :grouped => false, :title => 'CPU Time (s)', :size => '350x245', :encoding => 'text', :data => [chart_array[1], chart_array[2]], :axis_with_labels => 'x,y', :axis_labels => [chart_array[0], y_axis_values_f(max_value)], :min_value => 0, :max_value => max_value, :axis_range => [[0, max_value]], :legend => ['Compression', 'Decompression'], :grid_lines => '0,10,0,0', :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0}, :bar_colors => ['3072F3', 'FF9900'], :new_markers => 'N**,000000,0,-1,12,,c|N**,000000,1,-1,12&chdlp=t', :format => 'file', :filename => "#{TITLE}-cpu-time.png") # Generate File Size Chart puts "Writing '#{TITLE}-file-size.png'..." chart_array = file_size.drop(1).transpose chart_array[1] = chart_array[1].collect {|i| i/1024/1024.0} max_value = max_value_f(chart_array[1].max) Gchart.bar( :title => 'File Size (MiB)', :size => '350x245', :encoding => 'text', :data => chart_array[1], :axis_with_labels => 'x,y', :axis_labels => [chart_array[0], y_axis_values_f(max_value, 2.0)], :min_value => 0, :max_value => max_value, :axis_range => [[0, max_value]], :grid_lines => '0,10,0,0', :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0 }, :bar_colors => '3072F3', :new_markers => 'N**,000000,0,-1,12', :format => 'file', :filename => "#{TITLE}-file-size.png") # Mashup the charts into one image puts "Writing '#{TITLE}-3charts.png'..." img1 = "#{TITLE}-file-size.png" img2 = "#{TITLE}-cpu-time.png" img3 = "#{TITLE}-memory.png" img_output = "#{TITLE}-3charts.png" begin `convert #{img1} #{img2} +append #{img_output}` `convert #{img3} #{img_output} -append #{img_output}` rescue puts "The command line tool 'convert' from ImageMagick is not installed" exit(3) end