Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'nokogiri'
- require 'open-uri'
- require 'csv'
- csv = CSV.open("out.csv", "wb")
- (1..100).each do |page| # ustawic ile stron ma sciagac
- main_url = "https://allegro.pl/kategoria/samochody-osobowe-4029?order=m&p=#{page}"
- print "Pobieranie strony:#{page} "
- data = Nokogiri::HTML(open(main_url))
- print " OK\n"
- (1..60).each do |record| # bo 60 rekordow na stronie
- lp = ( page - 1 ) * 60 + record
- filerec = Array.new
- opis = data.xpath("/html/body/div[2]/div[3]/div/div/div/div/div[2]/div[2]/div/div[3]/div/div/div/div/div/div/div/section[2]/section/article[#{record}]/div/div/div[2]/div[1]/h2/a").text
- przebieg = data.xpath("/html/body/div[2]/div[3]/div/div/div/div/div[2]/div[2]/div/div[3]/div/div/div/div/div/div/div/section[2]/section/article[#{record}]/div/div/div[2]/div[1]/div/dl/dd[3]/span").text
- przebieg.gsub! ' km', '' #wyciecie koncowki
- rocznik = data.xpath("/html/body/div[2]/div[3]/div/div/div/div/div[2]/div[2]/div/div[3]/div/div/div/div/div/div/div/section[2]/section/article[#{record}]/div/div/div[2]/div[1]/div/dl/dd[2]/span").text
- cena = data.xpath("/html/body/div[2]/div[3]/div/div/div/div/div[2]/div[2]/div/div[3]/div/div/div/div/div/div/div/section[2]/section/article[#{record}]/div/div/div[2]/div[2]/div/div/span/span").text
- cena.gsub!( /(,\d\d zł)/, '' ) #wywalenie koncowki
- cena.gsub!( ' ', '' ) #wywalenie pustych spacji
- csv << [ lp, przebieg, rocznik, cena, opis ]
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment