daily pastebin goal
76%
SHARE
TWEET

Untitled

a guest Feb 23rd, 2018 61 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import scrapy
  2. import re
  3. import sys
  4.  
  5. class AmazonScrapy(scrapy.Spider):
  6.  
  7.     name = "AmazonScraper"
  8.        
  9.     start_urls = ["https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=electronic+devices"]
  10.    
  11.     def parse_item(self,response):
  12.  
  13.  
  14.         product_description = response.css("div #feature-bullets span.a-list-item::text").extract()
  15.         price_whole = response.css("span#priceblock_ourprice::text").extract()
  16.         original_price = response.css("div#unifiedPrice_feature_div span.a-text-strike::text").extract_first()
  17.         product_name = response.css("span#productTitle::text").extract_first().strip()
  18.         rating_temp = response.css("div#averageCustomerReviews i.a-icon-star span::text").extract_first()
  19.         rating = rating_temp[:3]
  20.         image_link = response.css("div#imgTagWrapperId img::attr(src)").extract()
  21.         self.stripper(product_description)
  22.         product_link = response.url
  23.  
  24.         if( price_whole is None):
  25.             pass
  26.         else:
  27.             yield{
  28.                 'product_desc' : product_description,
  29.                 'product_name' : product_name,
  30.                 'current_price' : price_whole,
  31.                 'original_price' : original_price,
  32.                 'rating' : rating,
  33.                 'image_link' : image_link,
  34.                 'product_link' : product_link
  35.  
  36.             }
  37.  
  38.     def parse(self,response):
  39.         for detail in response.css("div.s-item-container"):
  40.            
  41.             link = detail.css("a.s-access-detail-page::attr(href)").extract_first()
  42.        
  43.             if link is not None:
  44.                 yield scrapy.Request(link,callback=self.parse_item)
  45.  
  46.  
  47.            
  48.  
  49.         next_page = response.css('a.pagnNext::attr(href)').extract_first()
  50.         if next_page is not None:
  51.             next_page = response.urljoin(next_page)
  52.             yield scrapy.Request(next_page, callback=self.parse)
  53.    
  54.  
  55.  
  56.  
  57.     def stripper(self,list_desc):
  58.         for item in range(len(list_desc)):
  59.             list_desc[item] = list_desc[item].strip()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top