Advertisement
Guest User

Untitled

a guest
Jan 13th, 2017
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Ruby 6.64 KB | None | 0 0
  1.  
  2. require 'active_record'
  3. require 'nokogiri'
  4. require 'open-uri'
  5.  
  6. ActiveRecord::Base.establish_connection(
  7.   adapter:  'postgresql',
  8.   host:     'localhost',
  9.   database: 'ic_scrapper',
  10.   username: 'postgres',
  11.   password: 'postgres'
  12. )
  13.  
  14.  
  15. ActiveRecord::Schema.define do
  16.   if !table_exists?(:types)
  17.     create_table :types do |t|
  18.       t.string :name
  19.  
  20.       t.timestamps null: false
  21.     end
  22.   end
  23.  
  24.   if !table_exists?(:makes)
  25.     create_table :makes do |t|
  26.       t.references :type, index: true, foreign_key: true
  27.       t.string :name
  28.  
  29.       t.timestamps null: false
  30.     end
  31.   end
  32.  
  33.   if !table_exists?(:models)
  34.     create_table :models do |t|
  35.       t.references :make, index: true, foreign_key: true
  36.       t.string :name
  37.       t.integer :from
  38.       t.integer :to
  39.  
  40.       t.timestamps null: false
  41.     end
  42.   end
  43.  
  44.   if !table_exists?(:vehicles)
  45.     create_table :vehicles do |t|
  46.       t.references :model, index: true, foreign_key: true
  47.       t.references :make, index: true, foreign_key: true
  48.       t.string :name
  49.       t.string :from
  50.       t.string :to
  51.       t.string :engine_code
  52.       t.string :drive_type
  53.       t.string :construction_type
  54.       t.string :engine_type
  55.       t.string :fuel_type
  56.       t.string :axis_configuration
  57.       t.integer :capacity_ml
  58.       t.integer :power_hp
  59.       t.integer :power_kw
  60.       t.integer :cylinders
  61.       t.integer :ktype
  62.       t.integer :tonnage
  63.       t.float :capacity_l
  64.  
  65.  
  66.       t.timestamps null: false
  67.     end
  68.   end
  69. end
  70.  
  71. class Type < ActiveRecord::Base
  72.   has_many :makes, dependent: :destroy
  73. end
  74.  
  75. class Make < ActiveRecord::Base
  76.     has_many :models, dependent: :destroy
  77.     has_many :vehilces, dependent: :destroy
  78. end
  79.  
  80. class Model < ActiveRecord::Base
  81.   belongs_to :make
  82.   has_many :vehicles, dependent: :destroy
  83.  
  84.   accepts_nested_attributes_for :vehicles, allow_destroy: true
  85. end
  86.  
  87. class Vehicle < ActiveRecord::Base
  88.     belongs_to :make
  89.     belongs_to :model
  90.  
  91.     #validates_presence_of :name, :power_kw, :power_hp, :capacity_ml, :capacity_l, :from, :construction_type, :drive_type, :ktype
  92.     # validates_uniqueness_of :ktype, scope: :tecdoc_make_id
  93. end
  94.  
  95. class ICScrapper
  96.   def initialize
  97.     # [["pojazd osobowy", "O"], ["pojazd ciężarowy", "C"], ["motocykl", "M"]]
  98.     [["pojazd osobowy", "O"], ["pojazd ciężarowy", "C"], ["motocykl", "M"]].each_with_index do |type_describe, index|
  99.       type_name = type_describe[0]
  100.       type_letter = type_describe[1]
  101.       Type.where(name: type_name).first_or_initialize do |type|
  102.         type.save if type.new_record?
  103.         makes_page = get_data "http://e-katalog.intercars.com.pl/u/tecdoc/u_tecdoc_result.php?call=marka_select&wsk=#{type_letter}"
  104.         # makes_page.encoding = 'UTF-8'
  105.         makes_page.css("a").each do |a_make|
  106.           make_id = a_make.attr("href").split("/").last
  107.           Make.where(name: a_make.text).first_or_initialize do |make|
  108.             if make.new_record?
  109.               make.save
  110.               puts "Dodane marke #{make.name}"
  111.             end
  112.            
  113.             models_page = get_data "http://e-katalog.intercars.com.pl/u/tecdoc/u_tecdoc_result.php?call=model&mar=#{make_id}&wsk=#{type_letter}"
  114.             # models_page.encoding = 'UTF-8'
  115.             models_page.css("ul.model").each do |ul_model|
  116.               model_id = ul_model.attr("class").split("_").last
  117.               model_values = ul_model.css("span")
  118.               make.models.where(name: model_values[1].text).first_or_initialize do |model|
  119.                 if model.new_record?
  120.                   model.from = model_values[2].text
  121.                   model.to = model_values[3].text
  122.                   model.save
  123.                   puts "Dodane model #{model.name} dla marki #{make.name}"
  124.                 end
  125.  
  126.                 vehicles_page = get_data "http://e-katalog.intercars.com.pl/u/tecdoc/u_tecdoc_result.php?call=typ&model=#{model_id}&wsk=#{type_letter}"
  127.                 # vehicles_page.encoding = 'UTF-8'
  128.                 vehicles_page.css("ul.vmiddle.typ").each do |ul_vehicle|
  129.                   vehicle_ktype = ul_vehicle.attr("class").split("_").last
  130.                   vehicle_page = get_data "http://e-katalog.intercars.com.pl/dynamic/uni/ws_szczegoly_typu_tecdoc.php?wsk=#{type_letter}&typ=#{vehicle_ktype}&lang=PL"
  131.                   # vehicle_page.encoding = 'UTF-8'
  132.                   vehicle_values_hash = Hash.new.tap do |hash|
  133.                     vehicle_page.xpath("//tr[td[@class=\"dane-attLeft\"]]")
  134.                       .each do |tr|
  135.                         key = tr.xpath("td[@class=\"dane-attLeft\"]").text
  136.                         value = tr.xpath("td[@class=\"dane-attRight\"]").text
  137.                         hash[key] = value
  138.                       end
  139.                   end
  140.                   # puts vehicle_values_hash
  141.                   production_time = vehicle_values_hash["Produkowany od:"].split("->")
  142.                   model.vehicles.where(ktype: vehicle_ktype).first_or_initialize do |vehicle|
  143.                     if vehicle.new_record?
  144.                       vehicle.name = "#{make.name} #{model.name}"
  145.                       vehicle.make = make
  146.                       vehicle.from = production_time[0]
  147.                       vehicle.to = production_time[1]
  148.                       vehicle.capacity_ml = vehicle_values_hash["Pojemność w ccm (dane techniczne):"].to_i
  149.                       vehicle.capacity_l = vehicle_values_hash["Pojemność w litrach:"].to_f
  150.                       vehicle.power_kw = vehicle_values_hash["Moc [kW]:"].to_i
  151.                       vehicle.power_hp = vehicle_values_hash["Moc [kM]:"].to_i
  152.                       vehicle.cylinders = vehicle_values_hash["Liczba cylindrów:"].to_i
  153.                       vehicle.engine_code = vehicle_values_hash["Kody silnika:"]
  154.                       vehicle.drive_type = vehicle_values_hash["Rodzaj napędu:"]
  155.                       vehicle.construction_type = vehicle_values_hash["Rodzaj nadwozia:"]
  156.                       vehicle.fuel_type = vehicle_values_hash["Paliwo:"]
  157.                       vehicle.tonnage = vehicle_values_hash["Tonaż:"].to_i
  158.                       vehicle.axis_configuration = vehicle_values_hash["Konfiguracja osi:"]
  159.                       vehicle.save
  160.                       puts "Dodano typ #{vehicle.ktype} modelu #{model.name} do marki #{make.name}"
  161.                     end
  162.                   end
  163.                 end
  164.               end
  165.             end
  166.           end
  167.         end
  168.       end
  169.     end
  170.   end
  171. end
  172.  
  173. def get_data link
  174.   loop do
  175.     begin
  176.     return  Nokogiri::HTML open(link).read
  177.     rescue Exception => e
  178.       p e.message
  179.       p "Trying again in 1 sec..."
  180.       sleep 1
  181.       next
  182.     end
  183.     break
  184.   end
  185. end
  186.  
  187. ICScrapper.new
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement