Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Spider(scrapy.Spider):
- name = 'scrapyspider'
- allowed_domains = ['www.example.com']
- start_urls = ['https://www.example.com/1/', 'https://www.example.com/2/']
- def parse(self, response):
- for product_link in response.xpath(
- '//a[@class="product-link"]/@href').extract():
- absolute_url = response.urljoin(product_link)
- yield scrapy.Request(absolute_url, self.parse_product)
- for category_link in response.xpath(
- '//a[@class="navigation-item-link"]/@href').extract():
- absolute_url = response.urljoin(category_link)
- yield scrapy.Request(absolute_url, self.parse)
- def parse_product(self, response):
- ...
- yield item
Add Comment
Please, Sign In to add comment