Guest User

Untitled

a guest
Apr 17th, 2020
15
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.57 KB | None | 0 0
  1. class Dromspider(CrawlSpider):
  2. name = 'drom'
  3. start_urls = ['https://www.drom.ru/catalog/mitsubishi/']
  4.  
  5.  
  6. def fourth_floor(self, response):
  7. pass
  8.  
  9. def third_floor(self, response):
  10. pass
  11.  
  12.  
  13.  
  14. def second_floor(self, response):
  15. all_links = response.xpath('//div[@data-target-bind]//a[@href]')
  16. for a in all_links:
  17. item['region'] = a.xpath('../../@data-target-bind').get()
  18. raw_data = a.xpath('.//div[@class="b-info-block__descr"]//text()').getall()
  19. for i in raw_data.copy():
  20. if 'поколение' in i:
  21. item['generation'] = i
  22. raw_data.remove(i)
  23. if 'рестайлинг' in i:
  24. rest = i
  25. raw_data.remove(i)
  26. if i in ['Седан', 'Универсал', 'Хэтчбек', 'Купэ', 'Лифтбэк']:
  27. item['frametype'] = i
  28. raw_data.remove(i)
  29. item['frames'] = raw_data[-1] if raw_data else None
  30. production_year = re.compile('\d{2}\S\d{4}.{1,3}\d{2}\S\d{4}').findall(str(a.xpath('.//span/text()').getall()))
  31. time.sleep(0.75)
  32. yield item
  33.  
  34.  
  35. def parse(self, response):
  36. all_links = response.xpath('//div[contains(@class,"b-selectCars__section")]//./a')
  37. for link in all_links:
  38. item = DromItem()
  39. item['manufacturer'] = 'Mitsubishi'
  40. item['model'] = link.xpath('.//text()').get()
  41. lin = response.urljoin(link.xpath('.//@href').get())
  42. #scrapy.Request(lin, callback=self.second_floor)
  43. time.sleep(0.75)
  44. yield scrapy.Request(lin, item, callback=self.second_floor)
Advertisement
Add Comment
Please, Sign In to add comment