Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Dromspider(CrawlSpider):
- name = 'drom'
- start_urls = ['https://www.drom.ru/catalog/mitsubishi/']
- def fourth_floor(self, response):
- pass
- def third_floor(self, response):
- pass
- def second_floor(self, response):
- all_links = response.xpath('//div[@data-target-bind]//a[@href]')
- for a in all_links:
- item['region'] = a.xpath('../../@data-target-bind').get()
- raw_data = a.xpath('.//div[@class="b-info-block__descr"]//text()').getall()
- for i in raw_data.copy():
- if 'поколение' in i:
- item['generation'] = i
- raw_data.remove(i)
- if 'рестайлинг' in i:
- rest = i
- raw_data.remove(i)
- if i in ['Седан', 'Универсал', 'Хэтчбек', 'Купэ', 'Лифтбэк']:
- item['frametype'] = i
- raw_data.remove(i)
- item['frames'] = raw_data[-1] if raw_data else None
- production_year = re.compile('\d{2}\S\d{4}.{1,3}\d{2}\S\d{4}').findall(str(a.xpath('.//span/text()').getall()))
- time.sleep(0.75)
- yield item
- def parse(self, response):
- all_links = response.xpath('//div[contains(@class,"b-selectCars__section")]//./a')
- for link in all_links:
- item = DromItem()
- item['manufacturer'] = 'Mitsubishi'
- item['model'] = link.xpath('.//text()').get()
- lin = response.urljoin(link.xpath('.//@href').get())
- #scrapy.Request(lin, callback=self.second_floor)
- time.sleep(0.75)
- yield scrapy.Request(lin, item, callback=self.second_floor)
Advertisement
Add Comment
Please, Sign In to add comment