Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'active_record'
- require 'nokogiri'
- require 'open-uri'
- ActiveRecord::Base.establish_connection(
- adapter: 'postgresql',
- host: 'localhost',
- database: 'ic_scrapper',
- username: 'postgres',
- password: 'postgres'
- )
- ActiveRecord::Schema.define do
- if !table_exists?(:types)
- create_table :types do |t|
- t.string :name
- t.timestamps null: false
- end
- end
- if !table_exists?(:makes)
- create_table :makes do |t|
- t.references :type, index: true, foreign_key: true
- t.string :name
- t.timestamps null: false
- end
- end
- if !table_exists?(:models)
- create_table :models do |t|
- t.references :make, index: true, foreign_key: true
- t.string :name
- t.integer :from
- t.integer :to
- t.timestamps null: false
- end
- end
- if !table_exists?(:vehicles)
- create_table :vehicles do |t|
- t.references :model, index: true, foreign_key: true
- t.references :make, index: true, foreign_key: true
- t.string :name
- t.string :from
- t.string :to
- t.string :engine_code
- t.string :drive_type
- t.string :construction_type
- t.string :engine_type
- t.string :fuel_type
- t.string :axis_configuration
- t.integer :capacity_ml
- t.integer :power_hp
- t.integer :power_kw
- t.integer :cylinders
- t.integer :ktype
- t.integer :tonnage
- t.float :capacity_l
- t.timestamps null: false
- end
- end
- end
- class Type < ActiveRecord::Base
- has_many :makes, dependent: :destroy
- end
- class Make < ActiveRecord::Base
- has_many :models, dependent: :destroy
- has_many :vehilces, dependent: :destroy
- end
- class Model < ActiveRecord::Base
- belongs_to :make
- has_many :vehicles, dependent: :destroy
- accepts_nested_attributes_for :vehicles, allow_destroy: true
- end
- class Vehicle < ActiveRecord::Base
- belongs_to :make
- belongs_to :model
- #validates_presence_of :name, :power_kw, :power_hp, :capacity_ml, :capacity_l, :from, :construction_type, :drive_type, :ktype
- # validates_uniqueness_of :ktype, scope: :tecdoc_make_id
- end
- class ICScrapper
- def initialize
- # [["pojazd osobowy", "O"], ["pojazd ciężarowy", "C"], ["motocykl", "M"]]
- [["pojazd osobowy", "O"], ["pojazd ciężarowy", "C"], ["motocykl", "M"]].each_with_index do |type_describe, index|
- type_name = type_describe[0]
- type_letter = type_describe[1]
- Type.where(name: type_name).first_or_initialize do |type|
- type.save if type.new_record?
- makes_page = get_data "http://e-katalog.intercars.com.pl/u/tecdoc/u_tecdoc_result.php?call=marka_select&wsk=#{type_letter}"
- # makes_page.encoding = 'UTF-8'
- makes_page.css("a").each do |a_make|
- make_id = a_make.attr("href").split("/").last
- Make.where(name: a_make.text).first_or_initialize do |make|
- if make.new_record?
- make.save
- puts "Dodane marke #{make.name}"
- end
- models_page = get_data "http://e-katalog.intercars.com.pl/u/tecdoc/u_tecdoc_result.php?call=model&mar=#{make_id}&wsk=#{type_letter}"
- # models_page.encoding = 'UTF-8'
- models_page.css("ul.model").each do |ul_model|
- model_id = ul_model.attr("class").split("_").last
- model_values = ul_model.css("span")
- make.models.where(name: model_values[1].text).first_or_initialize do |model|
- if model.new_record?
- model.from = model_values[2].text
- model.to = model_values[3].text
- model.save
- puts "Dodane model #{model.name} dla marki #{make.name}"
- end
- vehicles_page = get_data "http://e-katalog.intercars.com.pl/u/tecdoc/u_tecdoc_result.php?call=typ&model=#{model_id}&wsk=#{type_letter}"
- # vehicles_page.encoding = 'UTF-8'
- vehicles_page.css("ul.vmiddle.typ").each do |ul_vehicle|
- vehicle_ktype = ul_vehicle.attr("class").split("_").last
- vehicle_page = get_data "http://e-katalog.intercars.com.pl/dynamic/uni/ws_szczegoly_typu_tecdoc.php?wsk=#{type_letter}&typ=#{vehicle_ktype}&lang=PL"
- # vehicle_page.encoding = 'UTF-8'
- vehicle_values_hash = Hash.new.tap do |hash|
- vehicle_page.xpath("//tr[td[@class=\"dane-attLeft\"]]")
- .each do |tr|
- key = tr.xpath("td[@class=\"dane-attLeft\"]").text
- value = tr.xpath("td[@class=\"dane-attRight\"]").text
- hash[key] = value
- end
- end
- # puts vehicle_values_hash
- production_time = vehicle_values_hash["Produkowany od:"].split("->")
- model.vehicles.where(ktype: vehicle_ktype).first_or_initialize do |vehicle|
- if vehicle.new_record?
- vehicle.name = "#{make.name} #{model.name}"
- vehicle.make = make
- vehicle.from = production_time[0]
- vehicle.to = production_time[1]
- vehicle.capacity_ml = vehicle_values_hash["Pojemność w ccm (dane techniczne):"].to_i
- vehicle.capacity_l = vehicle_values_hash["Pojemność w litrach:"].to_f
- vehicle.power_kw = vehicle_values_hash["Moc [kW]:"].to_i
- vehicle.power_hp = vehicle_values_hash["Moc [kM]:"].to_i
- vehicle.cylinders = vehicle_values_hash["Liczba cylindrów:"].to_i
- vehicle.engine_code = vehicle_values_hash["Kody silnika:"]
- vehicle.drive_type = vehicle_values_hash["Rodzaj napędu:"]
- vehicle.construction_type = vehicle_values_hash["Rodzaj nadwozia:"]
- vehicle.fuel_type = vehicle_values_hash["Paliwo:"]
- vehicle.tonnage = vehicle_values_hash["Tonaż:"].to_i
- vehicle.axis_configuration = vehicle_values_hash["Konfiguracja osi:"]
- vehicle.save
- puts "Dodano typ #{vehicle.ktype} modelu #{model.name} do marki #{make.name}"
- end
- end
- end
- end
- end
- end
- end
- end
- end
- end
- end
- def get_data link
- loop do
- begin
- return Nokogiri::HTML open(link).read
- rescue Exception => e
- p e.message
- p "Trying again in 1 sec..."
- sleep 1
- next
- end
- break
- end
- end
- ICScrapper.new
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement