#!/usr/bin/env ruby
require 'benchmark'
PATH = ARGV[0]
if PATH == nil || File.file?(PATH)
puts "Need a source directory to start the benchmark"
exit(1)
end
DEVSHM = "/dev/shm"
if File.file?(DEVSHM)
puts "No tmpfs at #{DEVSHM} available"
exit(2)
end
TITLE = File.basename(PATH).sub('.', '')
N_MEMVALS = 10
# formats: { format_label => {:file_extension, :compression_command, :decompression_command}, ... }
formats = {
'Lzop' => {:extension => 'lzo', :compress => 'lzop -U', :decompress => 'lzop -Ud'},
'Gzip' => {:extension => 'gz', :compress => 'gzip', :decompress => 'gunzip'},
'Bzip2' => {:extension => 'bz2', :compress => 'bzip2', :decompress => 'bunzip2'},
'7-Zip' => {:extension => '7z',
:compress => '/usr/lib/p7zip/7zr a -r -mmt=off',
:decompress => '/usr/lib/p7zip/7zr x -y -mmt=off'},
'Xz' => {:extension => 'xz', :compress => 'xz', :decompress => 'xz -d'},
}
# tee(str):
#
# Prints %str to the standard output and to the file #{TITLE}-summary.txt.
def tee(str = '')
if @tee_file == nil
@tee_file = File.open("#{TITLE}-summary.txt", 'w')
end
puts str
@tee_file.puts str
end
# Array#n_values(n):
#
# Return an array with %n values. Two last elements are respectively the
# average value and the last value if the array has more than %n values,
# otherwise the last value is repeated till %n.
class Array
def n_values(n)
arr = self
if arr.count > n
tmp = arr[(n-1)..arr.count]
last_val = tmp.reduce(:+) / tmp.count
arr[n-2] = last_val
arr[n-1] = tmp.last
end
arr = arr[0, n]
if arr.count < n
count = i = arr.count
while i < n
arr[i] = arr[count - 1]
i = i + 1
end
end
arr
end
end
# bm_process(command):
#
# Returns the time and memory (as array) needed for %command to perform.
def bm_process(command)
p = 0
mem = Array.new
t = Thread.new do
while true do
next if p == 0
File.open("/proc/#{p}/status").each do |line|
mem << line.sub(/.*:\s+/, '').to_i if line =~ /^VmRSS:.*/
end
sleep 0.5
end
end
time = Benchmark.realtime do
p = fork do
exec(command)
end
Process.waitpid(p)
t.exit
end
tee "\tCPU time (s): #{time}"
if mem.count > 1
tee "\tMax/Average RSS memory (kB): #{mem.max}/#{mem.drop(1).reduce(:+) / mem.drop(1).count}"
else
tee "\tRSS memory (kB): #{mem[0]}"
end
return time, mem
end
# Start the benchmarking
memory = []
cpu_time = [['Format Type', 'Compression Time (s)', 'Decompression Time (s)']]
file_size = [['Format Type', 'File Size (KiB)']]
tee "Benchmarking #{formats.count} compression tools on '#{TITLE}'..."
begin
tmppath = DEVSHM + "/" + TITLE + ".tmp"
# Create tar archive
`tar cf #{tmppath}.tar #{PATH}`
# Display number of files
tee "Files and directories: " + `tar tf #{tmppath}.tar | wc -l`
# Display archive file size
filesize = File.size?("#{tmppath}.tar")
file_size << ["Pristine", filesize]
tee "File size (KiB): " + (filesize / 1024).to_s
tee
formats.each do |k, v|
compress_command = "#{v[:compress]} #{tmppath}.tar"
decompress_command = "#{v[:decompress]} #{tmppath}.tar.#{v[:extension]}"
if k == '7-Zip'
compress_command = "#{v[:compress]} #{tmppath}.tar.7z #{tmppath}.tar"
decompress_command = "#{v[:decompress]} -o#{File.dirname(tmppath)} #{tmppath}.tar.7z"
end
# Time consumed by CPU for compressing
tee "#{k}"
compression = bm_process(compress_command)
# Display archive file size
filesize = File.size?("#{tmppath}.tar.#{v[:extension]}")
tee "\tFile size (KiB): " + (filesize / 1024).to_s
# Time consumed by CPU for decompressing
tee "#{k} (dec)"
decompression = bm_process(decompress_command)
tee
memory << [k, compression[1].n_values(N_MEMVALS)].flatten
memory << ["#{k} (dec)", decompression[1].n_values(N_MEMVALS)].flatten
cpu_time << [k, compression[0], decompression[0]]
file_size << [k, filesize]
end
ensure
`rm -rf #{tmppath}.tar*`
end
# Write CSV files
require 'csv'
memory = memory.transpose
CSV.open("#{TITLE}-memory.csv", 'w') do |writer|
memory.each do |row|
writer << row
end
end
CSV.open("#{TITLE}-cpu-time.csv", 'w') do |writer|
cpu_time.each do |row|
writer << row
end
end
CSV.open("#{TITLE}-file-size.csv", 'w') do |writer|
file_size.each do |row|
writer << row
end
end
# Retrieve charts through Google Chart API
require 'gchart'
def max_value(v)
v + v * 5 / 100
end
def max_value_f(v)
if v < 1
v = (v * 10.0).ceil / 10.0
else
v = v.ceil
end
v
end
def y_axis_values(max)
Array.new(6) {|i| ((i * 0.2) * max).to_i}
end
def y_axis_values_f(max, dec = 4.0)
dec = dec.to_f
Array.new(6) {|i| ((i * 0.2) * max * 10 ** dec).to_i / 10 ** dec}
end
def markers(labels, step)
markers = Array.new(labels.count) do |i|
"A#{labels[i]},666666,#{i},#{(i+1)*step},12"
end
end
# Generate Memory Chart
puts "Writing '#{TITLE}-memory.png'..."
bar_colors = ['76A4FB', 'FFCC33', '9FF670', '990066', 'BBCCED', '224499', '70AE4F']
chart_data = memory.drop(1).transpose
chart_legend = memory[0]
max_value = chart_data.flatten.max
max_value = max_value + max_value * 15 / 100
markers = markers(chart_legend, (N_MEMVALS - 2) / (chart_legend.count).to_f).join('|')
markers = markers + "|R,CCCCCC,0,#{(N_MEMVALS-2)/N_MEMVALS.to_f-0.02},#{(N_MEMVALS-1)/N_MEMVALS.to_f+0.005}" \
if cpu_time.drop(1).transpose.drop(1).flatten.max > N_MEMVALS / 2 # snapshot timeout in bm_process is 1/2 second
Gchart.line(
:title => 'RSS Memory (MiB)',
:size => '700x325',
:encoding => 'text',
:data => chart_data,
:axis_with_labels => 'x,y',
:axis_labels => [['0', "\u221E"], y_axis_values_f(max_value / 1024.0, 1.0)],
:min_value => 0,
:max_value => max_value,
:axis_range => [[0, max_value]],
:legend => chart_legend,
:grid_lines => '0,10,0,0',
:bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
:bar_colors => bar_colors[0..(chart_data.count)],
:thickness => ([3] * chart_data.count).join('|'),
:new_markers => markers,
:format => 'file',
:filename => "#{TITLE}-memory.png")
# Generate CPU Time Chart
puts "Writing '#{TITLE}-cpu-time.png'..."
chart_array = cpu_time.drop(1).transpose
max_value = max_value_f(chart_array[1].max) + max_value_f(chart_array[2].max)
Gchart.bar(
:grouped => false,
:title => 'CPU Time (s)',
:size => '350x245',
:encoding => 'text',
:data => [chart_array[1], chart_array[2]],
:axis_with_labels => 'x,y',
:axis_labels => [chart_array[0], y_axis_values_f(max_value)],
:min_value => 0,
:max_value => max_value,
:axis_range => [[0, max_value]],
:legend => ['Compression', 'Decompression'],
:grid_lines => '0,10,0,0',
:bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
:bar_colors => ['3072F3', 'FF9900'],
:new_markers => 'N**,000000,0,-1,12,,c|N**,000000,1,-1,12&chdlp=t',
:format => 'file',
:filename => "#{TITLE}-cpu-time.png")
# Generate File Size Chart
puts "Writing '#{TITLE}-file-size.png'..."
chart_array = file_size.drop(1).transpose
chart_array[1] = chart_array[1].collect {|i| i/1024/1024.0}
max_value = max_value_f(chart_array[1].max)
Gchart.bar(
:title => 'File Size (MiB)',
:size => '350x245',
:encoding => 'text',
:data => chart_array[1],
:axis_with_labels => 'x,y',
:axis_labels => [chart_array[0], y_axis_values_f(max_value, 2.0)],
:min_value => 0,
:max_value => max_value,
:axis_range => [[0, max_value]],
:grid_lines => '0,10,0,0',
:bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0 },
:bar_colors => '3072F3',
:new_markers => 'N**,000000,0,-1,12',
:format => 'file',
:filename => "#{TITLE}-file-size.png")
# Mashup the charts into one image
puts "Writing '#{TITLE}-3charts.png'..."
img1 = "#{TITLE}-file-size.png"
img2 = "#{TITLE}-cpu-time.png"
img3 = "#{TITLE}-memory.png"
img_output = "#{TITLE}-3charts.png"
begin
`convert #{img1} #{img2} +append #{img_output}`
`convert #{img3} #{img_output} -append #{img_output}`
rescue
puts "The command line tool 'convert' from ImageMagick is not installed"
exit(3)
end