SHARE
TWEET

Untitled

a guest Dec 14th, 2019 102 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import scrapy
  2. import logging
  3. import re
  4.  
  5.  
  6. class ItemdataSpider(scrapy.Spider):
  7.     name = 'itemdata'
  8.     allowed_domains = ['www.webstaurantstore.com']
  9.     start_urls = ['https://www.webstaurantstore.com/42529/restaurant-chairs.html']
  10.  
  11.     def parse(self, response):
  12.         items = response.xpath("//div[@class='details']/a[contains(@class, 'description')]")
  13.         for item in items:
  14.             link = item.xpath(".//@href").get()
  15.             yield response.follow(url=link, callback=self.parse_item)
  16.        
  17.         next_page = response.xpath("(//a[@rel='next'])[2]/@href").get()
  18.  
  19.         if next_page:
  20.             yield response.follow(url=next_page, callback=self.parse)
  21.  
  22.     def parse_item(self, response):
  23.         item_number = response.xpath("//span[@itemprop='sku']/text()").get()
  24.         item_specs = response.xpath("//tr[@class='trSpecSheetRow' and not(@hidden)]/td/text()").getall()
  25.  
  26.         ##yield {'item_number': item_number, 'item_specs': item_specs}
  27.  
  28.         ##try:
  29.         ##    item_heightstyle = item_specs.index('\nHeight Style\n')
  30.         ##    item_specA = item_specs[item_heightstyle + 1]
  31.         ##except:
  32.         ##    item_specA = 'N/A'
  33.        
  34.         ##try:
  35.         ##    item_color = item_specs.index('\nColor\n')
  36.         ##    item_specB = item_specs[item_color + 1]
  37.         ##except:
  38.         ##    item_specB = 'N/A'
  39.  
  40.         ##try:
  41.         ##    item_framecolor = item_specs.index('\nFrame Color\n')
  42.         ##    item_specC = item_specs[item_framecolor + 1]    
  43.         ##except:
  44.         ##    item_specC = 'N/A'
  45.  
  46.         ##try:
  47.         ##    item_framematerial = item_specs.index('\nFrame Material\n')
  48.         ##    item_specD = item_specs[item_framematerial + 1]
  49.         ##except:
  50.         ##    item_specD = 'N/A'
  51.  
  52.         ##try:    
  53.         ##    item_seatmaterial = item_specs.index('\nSeat Material\n')
  54.         ##    item_specE = item_specs[item_seatmaterial + 1]      
  55.         ##except:
  56.         ##    item_specE = 'N/A'
  57.  
  58.         ##try:
  59.         ##    item_style = item_specs.index('\nStyle\n')
  60.         ##    item_specF = item_specs[item_style + 1]  
  61.         ## except:
  62.          ##   item_specF = 'N/A'
  63.  
  64.         ##try:
  65.         ##    item_type = item_specs.index('\nType\n')
  66.         ##    item_specG = item_specs[item_type + 1]  
  67.         ##except:
  68.         ##    item_specG = 'N/A'
  69.  
  70.        ## try:
  71.         ##    item_usage = item_specs.index('\nUsage\n')
  72.         ##    item_specH = item_specs[item_usage + 1]  
  73.         ##except:
  74.         ##    item_specH = 'N/A'
  75.  
  76.         ##item_specs = [i.split(',', 2)[0] for i in item_specs]
  77.  
  78.  
  79.        ## yield {'item_number': item_number, 'item_Height_Style': item_specA, 'item_color': item_specB,
  80.        ##  'item_framecolor': item_specC, 'item_framematerial': item_specD, 'item_seatmaterial': item_specE,
  81.        ####  'item_style': item_specF, 'item_type': item_specG, 'item_usage': item_specH}
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top