Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'httparty'
- require 'nokogiri'
- class Scraper
- def perform
- url = 'https://twitter.com/'
- doc = Nokogiri::HTML(HTTParty.get(url))
- title_name = doc.title.downcase.gsub(/\s+/, '_').delete "." #"twitter_it's_what's_happening"
- elements = doc.css('div', 'button', 'span', 'link')
- elements.each do |element|
- hash = {
- element.name => {
- "#{title_name}_#{get_element(element)}_#{element.name}" => {
- type: element.name,
- selector: "css",
- identifier: element.css_path
- }
- }
- }
- puts hash
- # {"div"=>{"twitter_it's_what's_happening_modal-body_div"=>{
- # :type=>"div",
- # :selector=>"css",
- # :identifier=>"html > body > div:nth-of-type(12) > div > div > div:nth-of-type(2)"}}
- # }
- # {"button"=>{"twitter_it's_what's_happening_Cancel_button"=>{
- # :type=>"button",
- # :selector=>"css",
- # :identifier=>"html > body > div:nth-of-type(9) > div > div > div:nth-of-type(4) > button:nth-of-type(1)"}}
- # }
- # {"span"=>{"twitter_it's_what's_happening_Remove_span"=>{
- # :type=>"span",
- # :selector=>"css",
- # :identifier=>"html > body > div:nth-of-type(7) > div > div > div:nth-of-type(2) > div > form > div > div:nth-of-type(2) > div:nth-of-type(1) > ul > li > span"}}
- # }
- # {"link"=>{"twitter_it's_what's_happening_stylesheet_link"=>{
- # :type=>"link",
- # :selector=>"css",
- # :identifier=>"html > head > link:nth-of-type(1)"}}
- # }
- end
- end
- private
- def get_element(teg)
- if teg.name == 'div'
- teg['class']
- elsif teg.name == 'link'
- teg['rel']
- else
- teg.text.gsub(/\s+/, "")
- end
- end
- scraper = Scraper.new
- puts scraper.perform
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement