Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- #
- import sys,os,re
- import time,datetime
- import json,msgpack,yaml,csv
- from pprint import pprint
- import urllib
- from urllib2 import Request, urlopen, URLError, HTTPError
- from pprint import pprint
- def get_result(url):
- fp = urlopen(url); contents = fp.read(); fp.close();
- return contents
- # params
- api_key = "XXXXXX" # https://webservice.recruit.co.jp/register/index.html よりAPI KEY を取得して下さい。
- countries = ["JPN", "GER","USA","CAN","GBR","SWE","FRA","ITA","AUT","ESP","BEL","HOL","RUS","AUS","KOR","THA","MYS","IND","RSA","ZZZ"]
- count = 100
- # csv
- filename = "used_car.csv"
- writecsv = csv.writer(file(filename, 'w'), lineterminator='\n')
- def get_items(jsn):
- dic = {}
- dic['id'] = jsn['id']
- dic['model'] = jsn['model']
- dic['grade'] = jsn['grade']
- dic['color'] = jsn['color']
- dic['recycle'] = jsn['recycle']
- dic['engine'] = jsn['engine']
- dic['price'] = jsn['price']
- dic['inspection'] = jsn['inspection']
- dic['year'] = jsn['year']
- dic['desc'] = jsn['desc']
- dic['odd'] = jsn['odd'].encode('utf-8')
- m = re.match('(.*)万km' ,dic['odd'])
- if m:
- dic['odd_km'] = int(float(m.group(1))*10000)
- else:
- dic['odd_km'] = ''
- brand_jsn = jsn['brand']
- dic['name'] = brand_jsn['name']
- dic['code'] = brand_jsn['code']
- body_jsn = jsn['body']
- dic['body_name'] = body_jsn['name']
- dic['body_code'] = body_jsn['code']
- shop_jsn = jsn['shop']
- dic['shop_lat'] = shop_jsn ['lat']
- dic['shop_lng'] = shop_jsn ['lng']
- dic['shop_name'] = shop_jsn['name']
- dic['shop_datum'] = shop_jsn['datum']
- shop_pref_jsn = shop_jsn['pref']
- dic['shop_pref_name'] = shop_pref_jsn['name']
- dic['shop_pref_code'] = shop_pref_jsn['code']
- urls_jsn = jsn['urls']
- dic['url_qr'] = urls_jsn['qr']
- dic['url_mobile'] = urls_jsn['mobile']
- dic['url_pc'] = urls_jsn['pc']
- dic['maintenance_comment'] = jsn['maintenance_comment']
- dic['maintenance'] = jsn['maintenance']
- dic['maintenance_fee'] = jsn['maintenance_fee']
- dic['warranty'] = jsn['warranty']
- dic['warranty_fee'] = jsn['warranty_fee']
- dic['warranty_comment'] = jsn['warranty_comment']
- dic['warranty_length'] = jsn['warranty_length'].encode('utf-8')
- m = re.match('保証期間:(.*)', dic['warranty_length'])
- if m:
- dic['warranty_length_kikan'] = m.group(1)
- else:
- dic['warranty_length_kikan'] = ''
- m = re.match('保証期限:(.*)', dic['warranty_length'])
- if m:
- dic['warranty_length_kigen'] = m.group(1)
- else:
- dic['warranty_length_kigen'] = ''
- dic['warranty_distance'] = jsn['warranty_distance'].encode('utf-8')
- m = re.match('保証距離:(.*)', dic['warranty_distance'])
- if m:
- s = m.group(1)
- mm = re.match('(\d*)km',s)
- if mm:
- dic['warranty_distance'] = mm.group(1)
- else:
- dic['warranty_distance'] = s
- photo_jsn = jsn['photo']
- photo_main_jsn = photo_jsn['main']
- dic['main_caption'] = photo_main_jsn['caption']
- dic['main_photo_l'] = photo_main_jsn['l']
- dic['main_photo_s'] = photo_main_jsn['s']
- return dic
- ##### main #####
- for country in countries:
- print country
- contents = [0]
- start = 1
- header_flag = True
- counter = 0
- # url info
- while 0 < len(contents):
- base_url = "http://webservice.recruit.co.jp/carsensor/usedcar/v1/?key=%s&country=%s&format=json&count=%s&start=%s"%(api_key, country, count, start)
- print base_url
- res = json.loads(get_result(base_url))
- contents = res["results"]["usedcar"]
- if len(contents)==0: continue
- for jsn in contents:
- dic = get_items(jsn)
- keys = sorted(dic.keys())
- if header_flag:
- writecsv.writerow(keys)
- header_flag = False
- values = []
- for k in sorted(dic.keys()):
- k = k.encode('utf-8')
- if isinstance(dic[k],unicode):
- v = dic[k].encode('utf-8')
- else:
- v = dic[k]
- values.append(v)
- writecsv.writerow(values)
- counter += 1
- start += 100
- print counter
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement