Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class NikeSpider(scrapy.Spider):
- name = 'nike'
- allowed_domains = ['nike.com', 'store.nike.com']
- start_urls = ['https://www.nike.com/t/air-vapormax-flyknit-utility-running-shoe-XPTbVZzp/AH6834-400']
- def start_requests(self):
- for url in self.start_urls:
- yield SplashRequest (
- url=url,
- callback=self.parse,
- args= {
- 'wait': 5
- }
- )
- def parse(self, response):
- name = response.xpath('//*[@id="RightRail"]/div/div[1]/div[1]/h1/text()').extract_first()
- imageURL = response.css('#PDP > div > div:nth-child(2) > div.css-1jldkv2 > div:nth-child(1) > div > div > div.d-lg-h.bg-white.react-carousel > div > div.slider-container.horizontal.react-carousel-slides > ul > li.slide.selected > div > picture:nth-child(3) > img::attr(src)').extract_first()
- category = response.css('#RightRail > div > div.d-lg-ib.mb0-sm.mb8-lg.u-full-width > div.ncss-base.pr12-sm > h2::text').extract_first()
- url = response.url
- if name != None and imageURL != None and category != None:
- item = ProductItem()
- item['name'] = name
- item['imageURL'] = imageURL
- item['category'] = category
- item['URL'] = url
- yield item
Add Comment
Please, Sign In to add comment