Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def crawl(lin,level)
- begin
- if ((level <= 4) and !(lin.include? "javascript") and !(lin.to_s.include? "https") and (lin.to_s.include? "http://www.mysite.it") and (lin.include? "http") and filter_by_accent(lin))
- puts "visiting link: " + lin
- page = @agent.get(lin)
- if (page.parser.xpath("//*[@id=\"headerTempoConsegna\"]/text()").to_s.include?("Tempo di consegna medio"))
- @crawler.getDataFrom(lin,page)
- else
- page.links.each { |a|
- crawl(a.href.to_s,(level+1))
- }
- end
- end
- rescue Net::HTTPNotFound => e
- puts "Exception raised... continue on another link"
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment