Fazlul

Untitled

Jun 11th, 2021
39
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.28 KB | None | 0 0
  1. import scrapy
  2. from scrapy.linkextractors import LinkExtractor
  3. from scrapy.spiders import CrawlSpider, Rule
  4.  
  5.  
  6. class DealsSpider(CrawlSpider):
  7. name = 'deals'
  8. handle_httpstatus_all = True
  9. allowed_domains = ['amazon.ca']
  10. start_urls = ["https://www.amazon.ca/electronics-deals-electronics-sale-tv-sale/b/ref=gbps_ftr_m-4_b14c_page_" + str(x) + "?node=2055586011&nocache=1623420881512&gb_f_dealsce=dealTypes:DEAL_OF_THE_DAY%252CBEST_DEAL%252CLIGHTNING_DEAL,page:2,sortOrder:BY_SCORE,enforcedCategories:667823011%252C2404990011%252C2690975011%252C677230011%252C7337291011%252C3379552011%252C3379595011%252C6205511011%252C680468011%252C11402068011%252C6916844011%252C677268011%252C1233055011%252C677273011%252C7204527011%252C677250011%252C2690953011%252C677252011%252C677212011%252C677226011%252C3379546011,dealsPerPage:32&pf_rd_p=cd5fdf1b-afb1-45db-8a82-940414a2b14c&pf_rd_s=merchandised-search-4&pf_rd_t=101&pf_rd_i=2055586011&pf_rd_m=A3DWYIK6Y9EEQB&pf_rd_r=2C6RA5DEFBW1A3KQSPEM&ie=UTF8" for x in range(1,5)]
  11.  
  12. rules = (
  13. Rule(LinkExtractor(restrict_xpaths ='//a[@id="dealTitle"]'), callback='parse_item', follow=True),
  14. )
  15.  
  16. def parse_item(self, response):
  17. yield{
  18. 'title':response.xpath('.//h1[@id="title"]/span/text()').get()
  19. }
  20.  
  21.  
Advertisement
Add Comment
Please, Sign In to add comment