Advertisement
Guest User

Untitled

a guest
Apr 2nd, 2014
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 1.26 KB | None | 0 0
  1. #!/usr/bin/ruby -w
  2.  
  3. require 'rubygems'
  4. require 'mechanize'
  5.  
  6. agent = Mechanize.new
  7.  
  8. page = agent.get("http://weeklybeats.com")
  9. # page now contains the HTML
  10.  
  11. #get the current week from main page
  12. curWeek = page.search('span[id="listen_week"]').first.children.first.inner_text.to_i
  13.  
  14. #prepare variables before the loop
  15. previousArtists = Array.new
  16. artistDiff = Array.new
  17.  
  18. #loop through each week (click on each link on the main page)
  19. (1..curWeek).to_a.each do |i|
  20.   week_num = sprintf("%02d",i)
  21. # this commented line is the original line that worked in 2012, but now gives an error
  22. # results_page = agent.click(page.link_with(:text => "\302\240#{week_num}"))
  23.   results_page = agent.click(page.link_with(:text => /\302\240#{week_num}/))
  24.  
  25.   artists = Array.new
  26. # add artist name (found in cite tag) to artists list for week_num
  27.   results_page.search('cite').each {|artist| artists << artist.inner_text}
  28.  
  29. # uncomment to print week# and total artists for that week. broken, maxes at 200
  30. #  puts "#{week_num}  #{artists.length}"
  31.  
  32. # survivors
  33.   if i > 1
  34.     #use simple set operation to get the survivors
  35.     previousArtists = previousArtists & artists
  36.         puts"#{week_num} #{previousArtists.length}"
  37.   else
  38.     previousArtists = Array.new(artists)
  39.   end
  40. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement