Advertisement
mrigiyos

Untitled

Oct 31st, 2021
1,710
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 3.67 KB | None | 0 0
  1. require 'curb'
  2. require 'nokogiri'
  3. require 'csv'
  4. require 'yaml'
  5. require 'curl'
  6.  
  7. threads = []
  8.  
  9. def main
  10.   puts "Введите ссылку: "
  11.   url = gets.chomp
  12.   puts "Введите название файла:"
  13.   file_name = gets.chomp
  14.   #page = "https://www.petsonic.com/farmacia-para-gatos/"
  15.   page_qty = get_qty_of_pages(url)
  16.   all_links = get_pages_url(page_qty, url)
  17.   names_weights = extract_name(all_links)
  18.   price_array = extract_price(all_links)
  19.   img_link_arr = extract_img_links(all_links)
  20.   write_to_csv(file_name, names_weights, price_array, img_link_arr)
  21. end
  22.  
  23. def url_to_string(url)
  24.   page = Curl.get(url)
  25.   doc = Nokogiri::HTML(page.body_str)
  26.   doc
  27. end
  28.  
  29. class Product
  30.   attr_accessor :name, :price, :weight, :img_link
  31.   def initialize(name, price, weight, img_link)
  32.     @name = name
  33.     @weight = weight
  34.     @price = price
  35.     @img_link = img_link
  36.   end
  37. end
  38.  
  39. def get_qty_of_pages(url)
  40.   doc = url_to_string(url)
  41.   params = YAML.load_file('params.yml')
  42.   products_qty = doc.xpath(params['number_of_products']).text.to_i
  43.   page_qty = (products_qty / 25.0).ceil if products_qty % 25 != 0
  44.   page_qty
  45. end
  46.  
  47.  
  48. def get_pages_url(page_number, page )
  49.   params = YAML.load_file('params.yml')
  50.   all_products_links = []
  51.   (1..page_number).each do |i|
  52.     if i == 1
  53.       each_page = Curl.get(page)
  54.     else
  55.       each_page = Curl.get(page + "?p=" + "#{i}")
  56.     end
  57.     puts "Number of page parsing - " + i.to_s
  58.     i += 1
  59.     current_page = Nokogiri::HTML(each_page.body_str)
  60.     current_page.xpath(params['all_products_route']).each do |products|
  61.       all_products_links << products
  62.     end
  63.   end
  64.   all_products_links
  65. end
  66.  
  67. threads << Thread.new{
  68. def extract_name(all_products_links)
  69.   names_weights = []
  70.   params = YAML.load_file('params.yml')
  71.     all_products_links.each do |url|
  72.       puts "Чтение страницы - " + url
  73.       prod_page = url_to_string(url)
  74.       name = prod_page.xpath(params['product_name_route']).text
  75.       weight_block = prod_page.xpath(params['product_weight_price_for_loop'])
  76.       pw=0
  77.         weight_block.each do |i|
  78.           weight = i.xpath(params['product_price_route'])[pw].text
  79.           pw+=1
  80.           names_weights << "#{name} - #{weight}"
  81.         end
  82.       end
  83.   names_weights
  84. end
  85. }
  86.  
  87. threads << Thread.new{
  88. def extract_img_links(all_products_links)
  89.   params = YAML.load_file('params.yml')
  90.   img_links =[]
  91.   all_products_links.each do |url|
  92.     puts "Чтение страницы - " + url
  93.     prod_page = url_to_string(url)
  94.     img_link=prod_page.xpath(params['product_image_link_route'])
  95.     puts "Идет запись данных в файл"
  96.     img_links << img_link
  97.   end
  98.   img_links
  99. end
  100. }
  101.  
  102. threads << Thread.new{
  103. def extract_price(all_products_links)
  104.   params = YAML.load_file('params.yml')
  105.   prices = []
  106.   all_products_links.each do |url|
  107.     prod_page = url_to_string(url)
  108.     price_block = prod_page.xpath(params['product_weight_price_for_loop'])
  109.     pw=0
  110.     price_block.each do |i|
  111.       puts "Идет запись данных в файл"
  112.       price = i.xpath(params['product_price_route'])[pw].text
  113.       pw+=1
  114.       prices << price
  115.     end
  116.   end
  117.   prices
  118. end
  119. }
  120.  
  121. threads << Thread.new{
  122. def write_to_csv( file_name, names_weights, prices, links )
  123.   CSV.open(file_name, "w+") do |csv|
  124.     names_weights.zip(prices,links) { |row| csv << row }
  125.     # names_weights.each do |row|
  126.     #   csv <<  [row]
  127.     # end
  128.     # prices.each do |row|
  129.     #   csv << [row]
  130.     # end
  131.     # links.each do |row|
  132.     #   csv << [row]
  133.     # end
  134.     # ready_table = [names,weights]
  135.     # column << ready_table
  136.     # end
  137.   end
  138. end
  139. }
  140. threads.each { |thr| thr.join }
  141.  
  142. main
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement