Advertisement
Guest User

Untitled

a guest
Jun 12th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 1.84 KB | None | 0 0
  1. require 'httparty'
  2. require 'nokogiri'
  3.  
  4. class Scraper
  5.   def perform
  6.     url = 'https://twitter.com/'
  7.     doc = Nokogiri::HTML(HTTParty.get(url))
  8.     title_name = doc.title.downcase.gsub(/\s+/, '_').delete "." #"twitter_it's_what's_happening"
  9.  
  10.     elements = doc.css('div', 'button', 'span', 'link')
  11.     elements.each do |element|
  12.       hash = {
  13.         element.name => {
  14.             "#{title_name}_#{get_element(element)}_#{element.name}" => {
  15.             type: element.name,
  16.             selector: "css",
  17.             identifier: element.css_path
  18.           }
  19.         }
  20.       }
  21.       puts hash
  22.  
  23.       # {"div"=>{"twitter_it's_what's_happening_modal-body_div"=>{
  24.       #   :type=>"div",
  25.       #   :selector=>"css",
  26.       #   :identifier=>"html > body > div:nth-of-type(12) > div > div > div:nth-of-type(2)"}}
  27.       # }
  28.       # {"button"=>{"twitter_it's_what's_happening_Cancel_button"=>{
  29.       #   :type=>"button",
  30.       #   :selector=>"css",
  31.       #   :identifier=>"html > body > div:nth-of-type(9) > div > div > div:nth-of-type(4) > button:nth-of-type(1)"}}
  32.       # }
  33.       # {"span"=>{"twitter_it's_what's_happening_Remove_span"=>{
  34.       #   :type=>"span",
  35.       #   :selector=>"css",
  36.       #   :identifier=>"html > body > div:nth-of-type(7) > div > div > div:nth-of-type(2) > div > form > div > div:nth-of-type(2) > div:nth-of-type(1) > ul > li > span"}}
  37.       # }
  38.       # {"link"=>{"twitter_it's_what's_happening_stylesheet_link"=>{
  39.       #   :type=>"link",
  40.       #   :selector=>"css",
  41.       #   :identifier=>"html > head > link:nth-of-type(1)"}}
  42.       # }
  43.     end
  44.   end
  45.  
  46.   private
  47.  
  48.   def get_element(teg)
  49.     if teg.name == 'div'
  50.       teg['class']
  51.     elsif teg.name == 'link'
  52.       teg['rel']
  53.     else
  54.       teg.text.gsub(/\s+/, "")
  55.     end
  56.   end
  57.  
  58.   scraper = Scraper.new
  59.   puts scraper.perform
  60. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement