Guest User

benchmark-compression.rb

a guest
Sep 3rd, 2010
421
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env ruby
  2.  
  3. require 'benchmark'
  4.  
  5. PATH = ARGV[0]
  6. if PATH == nil || File.file?(PATH)
  7.     puts "Need a source directory to start the benchmark"
  8.     exit(1)
  9. end
  10.  
  11. DEVSHM = "/dev/shm"
  12. if File.file?(DEVSHM)
  13.     puts "No tmpfs at #{DEVSHM} available"
  14.     exit(2)
  15. end
  16.  
  17. TITLE = File.basename(PATH).sub('.', '')
  18. N_MEMVALS = 10
  19.  
  20. # formats: { format_label => {:file_extension, :compression_command, :decompression_command}, ... }
  21. formats = {
  22.     'Lzop'  => {:extension => 'lzo', :compress => 'lzop -U', :decompress => 'lzop -Ud'},
  23.     'Gzip'  => {:extension => 'gz',  :compress => 'gzip',    :decompress => 'gunzip'},
  24.     'Bzip2' => {:extension => 'bz2', :compress => 'bzip2',   :decompress => 'bunzip2'},
  25.     '7-Zip' => {:extension => '7z',
  26.         :compress => '/usr/lib/p7zip/7zr a -r -mmt=off',
  27.         :decompress => '/usr/lib/p7zip/7zr x -y -mmt=off'},
  28.     'Xz'    => {:extension => 'xz',  :compress => 'xz',      :decompress => 'xz -d'},
  29. }
  30.  
  31. # tee(str):
  32. #
  33. # Prints %str to the standard output and to the file #{TITLE}-summary.txt.
  34. def tee(str = '')
  35.     if @tee_file == nil
  36.         @tee_file = File.open("#{TITLE}-summary.txt", 'w')
  37.     end
  38.     puts str
  39.     @tee_file.puts str
  40. end
  41.  
  42. # Array#n_values(n):
  43. #
  44. # Return an array with %n values. Two last elements are respectively the
  45. # average value and the last value if the array has more than %n values,
  46. # otherwise the last value is repeated till %n.
  47. class Array
  48.     def n_values(n)
  49.         arr = self
  50.         if arr.count > n
  51.             tmp = arr[(n-1)..arr.count]
  52.             last_val = tmp.reduce(:+) / tmp.count
  53.             arr[n-2] = last_val
  54.             arr[n-1] = tmp.last
  55.         end
  56.         arr = arr[0, n]
  57.         if arr.count < n
  58.             count = i = arr.count
  59.             while i < n
  60.                 arr[i] = arr[count - 1]
  61.                 i = i + 1
  62.             end
  63.         end
  64.         arr
  65.     end
  66. end
  67.  
  68. # bm_process(command):
  69. #
  70. # Returns the time and memory (as array) needed for %command to perform.
  71. def bm_process(command)
  72.     p = 0
  73.     mem = Array.new
  74.  
  75.     t = Thread.new do
  76.         while true do
  77.             next if p == 0
  78.             File.open("/proc/#{p}/status").each do |line|
  79.                 mem << line.sub(/.*:\s+/, '').to_i if line =~ /^VmRSS:.*/
  80.             end
  81.             sleep 0.5
  82.         end
  83.     end
  84.  
  85.     time = Benchmark.realtime do
  86.         p = fork do
  87.             exec(command)
  88.         end
  89.         Process.waitpid(p)
  90.         t.exit
  91.     end
  92.  
  93.     tee "\tCPU time (s): #{time}"
  94.     if mem.count > 1
  95.         tee "\tMax/Average RSS memory (kB): #{mem.max}/#{mem.drop(1).reduce(:+) / mem.drop(1).count}"
  96.     else
  97.         tee "\tRSS memory (kB): #{mem[0]}"
  98.     end
  99.  
  100.     return time, mem
  101. end
  102.  
  103. # Start the benchmarking
  104. memory = []
  105. cpu_time = [['Format Type', 'Compression Time (s)', 'Decompression Time (s)']]
  106. file_size = [['Format Type', 'File Size (KiB)']]
  107.  
  108. tee "Benchmarking #{formats.count} compression tools on '#{TITLE}'..."
  109.  
  110. begin
  111.     tmppath = DEVSHM + "/" + TITLE + ".tmp"
  112.  
  113.     # Create tar archive
  114.     `tar cf #{tmppath}.tar #{PATH}`
  115.  
  116.     # Display number of files
  117.     tee "Files and directories: " + `tar tf #{tmppath}.tar | wc -l`
  118.  
  119.     # Display archive file size
  120.     filesize = File.size?("#{tmppath}.tar")
  121.     file_size << ["Pristine", filesize]
  122.     tee "File size (KiB): " + (filesize / 1024).to_s
  123.     tee
  124.  
  125.     formats.each do |k, v|
  126.         compress_command = "#{v[:compress]} #{tmppath}.tar"
  127.         decompress_command = "#{v[:decompress]} #{tmppath}.tar.#{v[:extension]}"
  128.         if k == '7-Zip'
  129.             compress_command = "#{v[:compress]} #{tmppath}.tar.7z #{tmppath}.tar"
  130.             decompress_command = "#{v[:decompress]} -o#{File.dirname(tmppath)} #{tmppath}.tar.7z"
  131.         end
  132.  
  133.         # Time consumed by CPU for compressing
  134.         tee "#{k}"
  135.         compression = bm_process(compress_command)
  136.  
  137.         # Display archive file size
  138.         filesize = File.size?("#{tmppath}.tar.#{v[:extension]}")
  139.         tee "\tFile size (KiB): " + (filesize / 1024).to_s
  140.  
  141.         # Time consumed by CPU for decompressing
  142.         tee "#{k} (dec)"
  143.         decompression = bm_process(decompress_command)
  144.  
  145.         tee
  146.  
  147.         memory << [k, compression[1].n_values(N_MEMVALS)].flatten
  148.         memory << ["#{k} (dec)", decompression[1].n_values(N_MEMVALS)].flatten
  149.         cpu_time << [k, compression[0], decompression[0]]
  150.         file_size << [k, filesize]
  151.     end
  152. ensure
  153.     `rm -rf #{tmppath}.tar*`
  154. end
  155.  
  156. # Write CSV files
  157. require 'csv'
  158.  
  159. memory = memory.transpose
  160. CSV.open("#{TITLE}-memory.csv", 'w') do |writer|
  161.     memory.each do |row|
  162.         writer << row
  163.     end
  164. end
  165.  
  166. CSV.open("#{TITLE}-cpu-time.csv", 'w') do |writer|
  167.     cpu_time.each do |row|
  168.         writer << row
  169.     end
  170. end
  171.  
  172. CSV.open("#{TITLE}-file-size.csv", 'w') do |writer|
  173.     file_size.each do |row|
  174.         writer << row
  175.     end
  176. end
  177.  
  178. # Retrieve charts through Google Chart API
  179. require 'gchart'
  180.  
  181. def max_value(v)
  182.     v + v * 5 / 100
  183. end
  184.  
  185. def max_value_f(v)
  186.     if v < 1
  187.         v = (v * 10.0).ceil / 10.0
  188.     else
  189.         v = v.ceil
  190.     end
  191.     v
  192. end
  193.  
  194. def y_axis_values(max)
  195.     Array.new(6) {|i| ((i * 0.2) * max).to_i}
  196. end
  197.  
  198. def y_axis_values_f(max, dec = 4.0)
  199.     dec = dec.to_f
  200.     Array.new(6) {|i| ((i * 0.2) * max * 10 ** dec).to_i / 10 ** dec}
  201. end
  202.  
  203. def markers(labels, step)
  204.     markers = Array.new(labels.count) do |i|
  205.         "A#{labels[i]},666666,#{i},#{(i+1)*step},12"
  206.     end
  207. end
  208.  
  209. # Generate Memory Chart
  210. puts "Writing '#{TITLE}-memory.png'..."
  211. bar_colors = ['76A4FB', 'FFCC33', '9FF670', '990066', 'BBCCED', '224499', '70AE4F']
  212. chart_data = memory.drop(1).transpose
  213. chart_legend = memory[0]
  214. max_value = chart_data.flatten.max
  215. max_value = max_value + max_value * 15 / 100
  216. markers = markers(chart_legend, (N_MEMVALS - 2) / (chart_legend.count).to_f).join('|')
  217. markers = markers + "|R,CCCCCC,0,#{(N_MEMVALS-2)/N_MEMVALS.to_f-0.02},#{(N_MEMVALS-1)/N_MEMVALS.to_f+0.005}" \
  218.     if cpu_time.drop(1).transpose.drop(1).flatten.max > N_MEMVALS / 2 # snapshot timeout in bm_process is 1/2 second
  219. Gchart.line(
  220.     :title => 'RSS Memory (MiB)',
  221.     :size => '700x325',
  222.     :encoding => 'text',
  223.     :data => chart_data,
  224.     :axis_with_labels => 'x,y',
  225.     :axis_labels => [['0', "\u221E"], y_axis_values_f(max_value / 1024.0, 1.0)],
  226.     :min_value => 0,
  227.     :max_value => max_value,
  228.     :axis_range => [[0, max_value]],
  229.     :legend => chart_legend,
  230.     :grid_lines => '0,10,0,0',
  231.     :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
  232.     :bar_colors => bar_colors[0..(chart_data.count)],
  233.     :thickness => ([3] * chart_data.count).join('|'),
  234.     :new_markers => markers,
  235.     :format => 'file',
  236.     :filename => "#{TITLE}-memory.png")
  237.  
  238. # Generate CPU Time Chart
  239. puts "Writing '#{TITLE}-cpu-time.png'..."
  240. chart_array = cpu_time.drop(1).transpose
  241. max_value = max_value_f(chart_array[1].max) + max_value_f(chart_array[2].max)
  242. Gchart.bar(
  243.     :grouped => false,
  244.     :title => 'CPU Time (s)',
  245.     :size => '350x245',
  246.     :encoding => 'text',
  247.     :data => [chart_array[1], chart_array[2]],
  248.     :axis_with_labels => 'x,y',
  249.     :axis_labels => [chart_array[0], y_axis_values_f(max_value)],
  250.     :min_value => 0,
  251.     :max_value => max_value,
  252.     :axis_range => [[0, max_value]],
  253.     :legend => ['Compression', 'Decompression'],
  254.     :grid_lines => '0,10,0,0',
  255.     :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0},
  256.     :bar_colors => ['3072F3', 'FF9900'],
  257.     :new_markers => 'N**,000000,0,-1,12,,c|N**,000000,1,-1,12&chdlp=t',
  258.     :format => 'file',
  259.     :filename => "#{TITLE}-cpu-time.png")
  260.  
  261. # Generate File Size Chart
  262. puts "Writing '#{TITLE}-file-size.png'..."
  263. chart_array = file_size.drop(1).transpose
  264. chart_array[1] = chart_array[1].collect {|i| i/1024/1024.0}
  265. max_value = max_value_f(chart_array[1].max)
  266. Gchart.bar(
  267.     :title => 'File Size (MiB)',
  268.     :size => '350x245',
  269.     :encoding => 'text',
  270.     :data => chart_array[1],
  271.     :axis_with_labels => 'x,y',
  272.     :axis_labels => [chart_array[0], y_axis_values_f(max_value, 2.0)],
  273.     :min_value => 0,
  274.     :max_value => max_value,
  275.     :axis_range => [[0, max_value]],
  276.     :grid_lines => '0,10,0,0',
  277.     :bar_width_and_spacing => {:width => 'a', :spacing => 20, :group_spacing => 0 },
  278.     :bar_colors => '3072F3',
  279.     :new_markers => 'N**,000000,0,-1,12',
  280.     :format => 'file',
  281.     :filename => "#{TITLE}-file-size.png")
  282.  
  283. # Mashup the charts into one image
  284. puts "Writing '#{TITLE}-3charts.png'..."
  285. img1 = "#{TITLE}-file-size.png"
  286. img2 = "#{TITLE}-cpu-time.png"
  287. img3 = "#{TITLE}-memory.png"
  288. img_output = "#{TITLE}-3charts.png"
  289. begin
  290.     `convert #{img1} #{img2} +append #{img_output}`
  291.     `convert #{img3} #{img_output} -append #{img_output}`
  292. rescue
  293.     puts "The command line tool 'convert' from ImageMagick is not installed"
  294.     exit(3)
  295. end
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×