Advertisement
Guest User

Untitled

a guest
Feb 23rd, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.09 KB | None | 0 0
  1. import scrapy
  2. import re
  3. import sys
  4.  
  5. class AmazonScrapy(scrapy.Spider):
  6.  
  7. name = "AmazonScraper"
  8.  
  9. start_urls = ["https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=electronic+devices"]
  10.  
  11. def parse_item(self,response):
  12.  
  13.  
  14. product_description = response.css("div #feature-bullets span.a-list-item::text").extract()
  15. price_whole = response.css("span#priceblock_ourprice::text").extract()
  16. original_price = response.css("div#unifiedPrice_feature_div span.a-text-strike::text").extract_first()
  17. product_name = response.css("span#productTitle::text").extract_first().strip()
  18. rating_temp = response.css("div#averageCustomerReviews i.a-icon-star span::text").extract_first()
  19. rating = rating_temp[:3]
  20. image_link = response.css("div#imgTagWrapperId img::attr(src)").extract()
  21. self.stripper(product_description)
  22. product_link = response.url
  23.  
  24. if( price_whole is None):
  25. pass
  26. else:
  27. yield{
  28. 'product_desc' : product_description,
  29. 'product_name' : product_name,
  30. 'current_price' : price_whole,
  31. 'original_price' : original_price,
  32. 'rating' : rating,
  33. 'image_link' : image_link,
  34. 'product_link' : product_link
  35.  
  36. }
  37.  
  38. def parse(self,response):
  39. for detail in response.css("div.s-item-container"):
  40.  
  41. link = detail.css("a.s-access-detail-page::attr(href)").extract_first()
  42.  
  43. if link is not None:
  44. yield scrapy.Request(link,callback=self.parse_item)
  45.  
  46.  
  47.  
  48.  
  49. next_page = response.css('a.pagnNext::attr(href)').extract_first()
  50. if next_page is not None:
  51. next_page = response.urljoin(next_page)
  52. yield scrapy.Request(next_page, callback=self.parse)
  53.  
  54.  
  55.  
  56.  
  57. def stripper(self,list_desc):
  58. for item in range(len(list_desc)):
  59. list_desc[item] = list_desc[item].strip()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement