Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'curb'
- require 'nokogiri'
- require 'csv'
- require 'yaml'
- require 'curl'
- threads = []
- def main
- puts "Введите ссылку: "
- url = gets.chomp
- puts "Введите название файла:"
- file_name = gets.chomp
- #page = "https://www.petsonic.com/farmacia-para-gatos/"
- page_qty = get_qty_of_pages(url)
- all_links = get_pages_url(page_qty, url)
- names_weights = extract_name(all_links)
- price_array = extract_price(all_links)
- img_link_arr = extract_img_links(all_links)
- write_to_csv(file_name, names_weights, price_array, img_link_arr)
- end
- def url_to_string(url)
- page = Curl.get(url)
- doc = Nokogiri::HTML(page.body_str)
- doc
- end
- class Product
- attr_accessor :name, :price, :weight, :img_link
- def initialize(name, price, weight, img_link)
- @name = name
- @weight = weight
- @price = price
- @img_link = img_link
- end
- end
- def get_qty_of_pages(url)
- doc = url_to_string(url)
- params = YAML.load_file('params.yml')
- products_qty = doc.xpath(params['number_of_products']).text.to_i
- page_qty = (products_qty / 25.0).ceil if products_qty % 25 != 0
- page_qty
- end
- def get_pages_url(page_number, page )
- params = YAML.load_file('params.yml')
- all_products_links = []
- (1..page_number).each do |i|
- if i == 1
- each_page = Curl.get(page)
- else
- each_page = Curl.get(page + "?p=" + "#{i}")
- end
- puts "Number of page parsing - " + i.to_s
- i += 1
- current_page = Nokogiri::HTML(each_page.body_str)
- current_page.xpath(params['all_products_route']).each do |products|
- all_products_links << products
- end
- end
- all_products_links
- end
- threads << Thread.new{
- def extract_name(all_products_links)
- names_weights = []
- params = YAML.load_file('params.yml')
- all_products_links.each do |url|
- puts "Чтение страницы - " + url
- prod_page = url_to_string(url)
- name = prod_page.xpath(params['product_name_route']).text
- weight_block = prod_page.xpath(params['product_weight_price_for_loop'])
- pw=0
- weight_block.each do |i|
- weight = i.xpath(params['product_price_route'])[pw].text
- pw+=1
- names_weights << "#{name} - #{weight}"
- end
- end
- names_weights
- end
- }
- threads << Thread.new{
- def extract_img_links(all_products_links)
- params = YAML.load_file('params.yml')
- img_links =[]
- all_products_links.each do |url|
- puts "Чтение страницы - " + url
- prod_page = url_to_string(url)
- img_link=prod_page.xpath(params['product_image_link_route'])
- puts "Идет запись данных в файл"
- img_links << img_link
- end
- img_links
- end
- }
- threads << Thread.new{
- def extract_price(all_products_links)
- params = YAML.load_file('params.yml')
- prices = []
- all_products_links.each do |url|
- prod_page = url_to_string(url)
- price_block = prod_page.xpath(params['product_weight_price_for_loop'])
- pw=0
- price_block.each do |i|
- puts "Идет запись данных в файл"
- price = i.xpath(params['product_price_route'])[pw].text
- pw+=1
- prices << price
- end
- end
- prices
- end
- }
- threads << Thread.new{
- def write_to_csv( file_name, names_weights, prices, links )
- CSV.open(file_name, "w+") do |csv|
- names_weights.zip(prices,links) { |row| csv << row }
- # names_weights.each do |row|
- # csv << [row]
- # end
- # prices.each do |row|
- # csv << [row]
- # end
- # links.each do |row|
- # csv << [row]
- # end
- # ready_table = [names,weights]
- # column << ready_table
- # end
- end
- end
- }
- threads.each { |thr| thr.join }
- main
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement