Guest User

Untitled

a guest
Jan 17th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.73 KB | None | 0 0
  1. class Spider(scrapy.Spider):
  2. name = 'scrapyspider'
  3. allowed_domains = ['www.example.com']
  4. start_urls = ['https://www.example.com/1/', 'https://www.example.com/2/']
  5.  
  6.  
  7. def parse(self, response):
  8. for product_link in response.xpath(
  9. '//a[@class="product-link"]/@href').extract():
  10. absolute_url = response.urljoin(product_link)
  11. yield scrapy.Request(absolute_url, self.parse_product)
  12. for category_link in response.xpath(
  13. '//a[@class="navigation-item-link"]/@href').extract():
  14. absolute_url = response.urljoin(category_link)
  15. yield scrapy.Request(absolute_url, self.parse)
  16.  
  17. def parse_product(self, response):
  18. ...
  19. yield item
Add Comment
Please, Sign In to add comment