Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'net/http'
- require 'rubygems'
- require 'bundler/setup'
- Bundler.require(:default)
- def get_path(http, path, headers = {})
- sleep(1)
- puts "Loading #{path}\n"
- req = Net::HTTP::Get.new(path, {
- 'Referer' => 'https://www.ricardo.ch/verkaufen/verkaufsformular/schritt1?src=btn_header_verkaufen&SSL=ON',
- 'X-Requested-With'=> 'XMLHttpRequest',
- 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36',
- 'Cookie' => '__RequestVerificationToken_L05TRg2=kkGp2TsaWGlv9rFR5GOux9viivKMcod0JkJLplLPHr45Ou3iC1gsMvIR4fn017imgkP9of0oeQwybPRS889FEFM0Yyg1; BIGipServer~Ricardo_Prod~pool_webserver_ch_de=1041261578.18975.0000; ASP.NET_SessionId=131lk21sli3bxbmi5vkpblhz;',
- 'Accept' => '*/*'
- }.merge(headers))
- res = http.request(req)
- page = nil
- case res
- when Net::HTTPSuccess
- begin
- if res.header[ 'Content-Encoding' ].eql?( 'gzip' )
- sio = StringIO.new( res.body )
- gz = Zlib::GzipReader.new( sio )
- page = gz.read()
- else
- page = res.body
- end
- rescue Exception
- # handle errors
- raise $!.message
- end
- else
- raise res
- end
- yield(page, res.header.to_hash)
- rescue
- sleep(10)
- get_path(http, path, headers)
- end
- https = Net::HTTP.new('www.ricardo.ch', 443)
- https.use_ssl = true
- categories = {}
- get_path(https, '/verkaufen/verkaufsformular/schritt1') do |page|
- doc = Nokogiri.HTML(page)
- doc.css('#navCatLvl1 li a').each do |element|
- id = element['data-catid']
- categories[id] = {
- update_level: element['data-update-level'],
- level: element['data-level'],
- title: element['data-text'],
- final: element['data-final-category'] == 'True',
- subcategories: {}
- }
- end
- end
- def load_category(https, id, hash)
- cat = hash[id]
- return if cat[:final]
- get_path(https, "/verkaufen/verkaufsformular/getchildcategories/?id=#{id}&level=#{cat[:update_level]}") do |page|
- doc = Nokogiri.HTML(page)
- doc.css('.navCat ul li a').each do |element|
- id = element['data-catid']
- cat[:subcategories][id] = {
- update_level: element['data-update-level'],
- level: element['data-level'],
- title: element['data-text'],
- final: element['data-final-category'] == 'True',
- subcategories: {}
- }
- load_category(https, id, cat[:subcategories])
- end
- end
- end
- categories.each_pair do |id, hash|
- load_category(https, id, categories)
- end
- puts MultiJson.dump(categories)
Add Comment
Please, Sign In to add comment