Advertisement
Guest User

Untitled

a guest
Jul 17th, 2019
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.16 KB | None | 0 0
  1. import scrapy
  2.  
  3. class AmazonItem(scrapy.Item):
  4. rating = scrapy.Field()
  5. date = scrapy.Field()
  6. review = scrapy.Field()
  7. link = scrapy.Field()
  8.  
  9. class AmazonSpider(scrapy.Spider):
  10.  
  11. name = "amazon"
  12. allowed_domains = ['amazon.co.uk']
  13. start_urls = ['http://www.amazon.co.uk/product-reviews/B0042EU3A2/' ]
  14.  
  15. def parse(self, response):
  16.  
  17. for sel in response.xpath('//table[@id="productReviews"]//tr/td/div'):
  18.  
  19. item = AmazonItem()
  20. item['rating'] = sel.xpath('./div/span/span/span/text()').extract()
  21. item['date'] = sel.xpath('./div/span/nobr/text()').extract()
  22. item['review'] = sel.xpath('./div[@class="reviewText"]/text()').extract()
  23. item['link'] = sel.xpath('.//a[contains(.,"Permalink")]/@href').extract()
  24. yield item
  25.  
  26. xpath_Next_Page = './/table[@id="productReviews"]/following::*//span[@class="paging"]/a[contains(.,"Next")]/@href'
  27. if response.xpath(xpath_Next_Page):
  28. url_Next_Page = response.xpath(xpath_Next_Page).extract()[0]
  29. request = scrapy.Request(url_Next_Page, callback=self.parse)
  30. yield request
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement