Guest User

Untitled

a guest
Mar 12th, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.88 KB | None | 0 0
  1. require 'rubygems'
  2. require 'hpricot'
  3. require 'open-uri'
  4. require 'mofo'
  5.  
  6. def me_urls_in(doc, base_url)
  7. me_urls = doc.search('a[@rel="me"]').map do |element|
  8. # Absolute URL
  9. if element['href'] =~ /^http:/
  10. element['href']
  11. # Absolute path
  12. elsif element['href'] =~ /^\//
  13. base_url.sub(/^(http:\/\/[^\/]+).*/, '\1') + element['href']
  14. # Relative path
  15. else
  16. base_url + element['href']
  17. end
  18. end
  19. end
  20.  
  21. def search(url)
  22. # Collection of processed and unprocessed URLs
  23. unprocessed_urls = [ url ]
  24. processed_urls = []
  25.  
  26. # Gathered personal information so far
  27. full_names = []
  28.  
  29. loop do
  30. # Take an unprocesed URL
  31. url = unprocessed_urls.shift
  32.  
  33. # Stop if there's nothing more to process
  34. break if url.nil?
  35.  
  36. # Skip if it's actually already processed
  37. next if processed_urls.include?(url)
  38.  
  39. # Mark it as processed
  40. processed_urls << url
  41.  
  42. # Debug
  43. puts '=== Processing ' + url.inspect
  44.  
  45. # Parse document
  46. content = open(url).read
  47. doc = Hpricot(content)
  48.  
  49. # Find new names
  50. addresses = doc.search('address')
  51. hcards = addresses.map { |address| hCard.find(:all => { :text => address.inner_html }) }.flatten
  52. names = hcards.map { |hcard| hcard.fn }
  53. full_names = (full_names + names).uniq
  54. puts 'Found new names:'
  55. names.each { |name| puts ' - ' + name }
  56.  
  57. # Find unprocessed URLs
  58. puts 'Found new URLs:'
  59. me_urls_in(doc, url).sort.each do |new_url|
  60. # Debug
  61. puts ' - ' + new_url
  62.  
  63. # Add it to the queue
  64. unprocessed_urls << new_url
  65. end
  66. end
  67.  
  68. # Print all URLs
  69. puts '=== Done!'
  70. puts 'Names:'
  71. full_names.each { |name| puts ' - ' + name }
  72. puts 'URLs:'
  73. processed_urls.each { |new_url| puts ' - ' + new_url }
  74. end
  75.  
  76. if ARGV[0].nil?
  77. puts 'usage: search_me [url]'
  78. else
  79. search(ARGV[0])
  80. end
Add Comment
Please, Sign In to add comment