Fazlul

Untitled

Jun 11th, 2021
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.15 KB | None | 0 0
  1. import scrapy
  2. from scrapy.exceptions import CloseSpider
  3.  
  4.  
  5. class AmazonDealsSpider(scrapy.Spider):
  6. name = 'amazon_deals'
  7. page_number = 2
  8.  
  9. def start_requests(self):
  10. yield scrapy.Request(url = 'https://www.amazon.ca/s/ref=gbps_ftr_m-4_b14c_page_2?url=node%3D2055586011&field-keywords=&gb_f_dealsce=dealTypes:DEAL_OF_THE_DAY%252CBEST_DEAL%252CLIGHTNING_DEAL,page:2,sortOrder:BY_SCORE,enforcedCategories:667823011%252C2404990011%252C2690975011%252C677230011%252C7337291011%252C3379552011%252C3379595011%252C6205511011%252C680468011%252C11402068011%252C6916844011%252C677268011%252C1233055011%252C677273011%252C7204527011%252C677250011%252C2690953011%252C677252011%252C677212011%252C677226011%252C3379546011,dealsPerPage:24&pf_rd_p=cd5fdf1b-afb1-45db-8a82-940414a2b14c&pf_rd_s=merchandised-search-4&pf_rd_t=101&pf_rd_i=2055586011&pf_rd_m=A3DWYIK6Y9EEQB&pf_rd_r=D1JEK6H1XJ1Z37YMYR5J&ie=UTF8',
  11.  
  12. callback = self.parse
  13. )
  14. def parse(self, response):
  15.  
  16. deals = response.xpath('//*[@class="a-section dealContainer"]')
  17. # if deals == 0:
  18. # raise CloseSpider
  19. for deal in deals:
  20. title =deal.xpath('.//h1[@id="title"]/span/text()').get()
  21. sell_price = deal.xpath('.//span[@class="gb-font-size-medium inlineBlock unitLineHeight dealPriceText"]/text()').get(),
  22. actual_price = deal.xpath('.//span[@class="a-color-base gb-font-size-base inlineBlock unitLineHeight a-text-strike"]/text()').get(),
  23. discounted_rate = deal.xpath('.//span[@class="a-color-base gb-font-size-base inlineBlock unitLineHeight a-text-strike"]/following-sibling::span/text()').get(),
  24. review_rating = deal.xpath('.(//*[@class="a-icon a-icon-star a-star-4-5"]/span)[1]/text()').get(),
  25. reviewers = deal.xpath('.(//span[@id="acrCustomerReviewText"])[1]/text()').get()
  26.  
  27. yield {
  28.  
  29. 'Title': title,
  30. 'sell_price': sell_price,
  31. 'actual_price': actual_price,
  32. 'discounted_rate': discounted_rate,
  33. 'review_rating': review_rating,
  34. 'reviewers': reviewers
  35. }
  36.  
  37. next_page = 'https://www.amazon.ca/s/ref=gbps_ftr_m-4_b14c_page_'+ str(self.page_number) +'?url=node%3D2055586011&field-keywords=&gb_f_dealsce=dealTypes:DEAL_OF_THE_DAY%252CBEST_DEAL%252CLIGHTNING_DEAL,page:3,sortOrder:BY_SCORE,enforcedCategories:667823011%252C2404990011%252C2690975011%252C677230011%252C7337291011%252C3379552011%252C3379595011%252C6205511011%252C680468011%252C11402068011%252C6916844011%252C677268011%252C1233055011%252C677273011%252C7204527011%252C677250011%252C2690953011%252C677252011%252C677212011%252C677226011%252C3379546011,dealsPerPage:24&pf_rd_p=cd5fdf1b-afb1-45db-8a82-940414a2b14c&pf_rd_s=merchandised-search-4&pf_rd_t=101&pf_rd_i=2055586011&pf_rd_m=A3DWYIK6Y9EEQB&pf_rd_r=ZA17NRCVD7YXNCMPK1E3&ie=UTF8'
  38. if self.page_number <= 5:
  39. self.page_number += 1
  40. yield response.follow(next_page,
  41. callback = self.parse)
Advertisement
Add Comment
Please, Sign In to add comment