Advertisement
Guest User

Extract URLs

a guest
Oct 20th, 2014
246
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 1.02 KB | None | 0 0
  1. #!/usr/bin/env ruby
  2.  
  3. # = extractURL.rb
  4. #
  5. # Autor: Alejandro Perez
  6. #
  7. # == Test Script
  8. #
  9. # Extracts URIs from a tcpdump capture in offline mode.
  10. # - Input: pcap file
  11. # - Output: URIs extracted (plain text)
  12. #
  13.  
  14. require 'pcaplet'
  15.  
  16. # handle input arguments
  17. if ARGV.length != 1
  18.     puts "Usage: ruby #{$0} INPUT_FILE"
  19.     exit
  20. end
  21. file = ARGV[0]
  22.  
  23. # open the input tcpdump row file and filter it
  24. capture = Pcap::Capture.open_offline(file)
  25. filter = Pcap::Filter.new('tcp and dst port 8080', capture)
  26. capture.setfilter(filter)
  27.  
  28. # regular expression to extract the URLs
  29. regexp = /(GET|POST|HEAD)([^\r\n]*?)HTTP.*?Host:([^\r\n]*)/xm
  30.  
  31. # main loop
  32. nPackets = 0
  33. nUrls = 0
  34. capture.loop do |pkt|
  35.     nPackets += 1
  36.     if pkt.tcp_data =~ regexp
  37.         pkt.tcp_data.scan(regexp) { |method, path, host|
  38.             puts "http://#{host.strip}#{path.strip}"
  39.             nUrls += 1
  40.         }
  41.     end
  42. end
  43.  
  44. # summary output
  45. puts "# #{nPackets} packets processed"
  46. if nPackets > 0
  47.     puts "# #{nUrls} URLs extracted (#{nUrls*100/nPackets}%)"
  48. end
  49.  
  50. # close
  51. capture.close
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement