Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- HOW_MANY_PAGES = 10
- URL_PATTERN = "https://rostov.hh.ru/search/vacancy?L_is_autosearch=false&area=113&clusters=true&enable_snippets=true&order_by=publication_time&schedule=remote&page=%d"
- def scrape
- vacancy_list = []
- 1.upto(HOW_MANY_PAGES).map do |i|
- url = URL_PATTERN % i
- puts "Parsing page #{i}: #{url}"
- page = Nokogiri::HTML(open(url) { |s| s.read })
- page.css('div.vacancy-serp-item').map do |element|
- {
- position: element.css('span.g-user-content').text,
- salary: element.css('div.vacancy-serp-item__sidebar').text,
- company: element.css('div.vacancy-serp-item__meta-info').text,
- work_desc: element.css('div.g-user-content').text,
- link: element.at_css('a')['href']
- }
- end
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement