Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/ruby -w
- require 'rubygems'
- require 'mechanize'
- agent = Mechanize.new
- page = agent.get("http://weeklybeats.com")
- # page now contains the HTML
- #get the current week from main page
- curWeek = page.search('span[id="listen_week"]').first.children.first.inner_text.to_i
- #prepare variables before the loop
- previousArtists = Array.new
- artistDiff = Array.new
- #loop through each week (click on each link on the main page)
- (1..curWeek).to_a.each do |i|
- week_num = sprintf("%02d",i)
- # this commented line is the original line that worked in 2012, but now gives an error
- # results_page = agent.click(page.link_with(:text => "\302\240#{week_num}"))
- results_page = agent.click(page.link_with(:text => /\302\240#{week_num}/))
- artists = Array.new
- # add artist name (found in cite tag) to artists list for week_num
- results_page.search('cite').each {|artist| artists << artist.inner_text}
- # uncomment to print week# and total artists for that week. broken, maxes at 200
- # puts "#{week_num} #{artists.length}"
- # survivors
- if i > 1
- #use simple set operation to get the survivors
- previousArtists = previousArtists & artists
- puts"#{week_num} #{previousArtists.length}"
- else
- previousArtists = Array.new(artists)
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement