Advertisement
Guest User

Untitled

a guest
Dec 14th, 2019
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.89 KB | None | 0 0
  1. import scrapy
  2. import logging
  3. import re
  4.  
  5.  
  6. class ItemdataSpider(scrapy.Spider):
  7. name = 'itemdata'
  8. allowed_domains = ['www.webstaurantstore.com']
  9. start_urls = ['https://www.webstaurantstore.com/42529/restaurant-chairs.html']
  10.  
  11. def parse(self, response):
  12. items = response.xpath("//div[@class='details']/a[contains(@class, 'description')]")
  13. for item in items:
  14. link = item.xpath(".//@href").get()
  15. yield response.follow(url=link, callback=self.parse_item)
  16.  
  17. next_page = response.xpath("(//a[@rel='next'])[2]/@href").get()
  18.  
  19. if next_page:
  20. yield response.follow(url=next_page, callback=self.parse)
  21.  
  22. def parse_item(self, response):
  23. item_number = response.xpath("//span[@itemprop='sku']/text()").get()
  24. item_specs = response.xpath("//tr[@class='trSpecSheetRow' and not(@hidden)]/td/text()").getall()
  25.  
  26. ##yield {'item_number': item_number, 'item_specs': item_specs}
  27.  
  28. ##try:
  29. ## item_heightstyle = item_specs.index('\nHeight Style\n')
  30. ## item_specA = item_specs[item_heightstyle + 1]
  31. ##except:
  32. ## item_specA = 'N/A'
  33.  
  34. ##try:
  35. ## item_color = item_specs.index('\nColor\n')
  36. ## item_specB = item_specs[item_color + 1]
  37. ##except:
  38. ## item_specB = 'N/A'
  39.  
  40. ##try:
  41. ## item_framecolor = item_specs.index('\nFrame Color\n')
  42. ## item_specC = item_specs[item_framecolor + 1]
  43. ##except:
  44. ## item_specC = 'N/A'
  45.  
  46. ##try:
  47. ## item_framematerial = item_specs.index('\nFrame Material\n')
  48. ## item_specD = item_specs[item_framematerial + 1]
  49. ##except:
  50. ## item_specD = 'N/A'
  51.  
  52. ##try:
  53. ## item_seatmaterial = item_specs.index('\nSeat Material\n')
  54. ## item_specE = item_specs[item_seatmaterial + 1]
  55. ##except:
  56. ## item_specE = 'N/A'
  57.  
  58. ##try:
  59. ## item_style = item_specs.index('\nStyle\n')
  60. ## item_specF = item_specs[item_style + 1]
  61. ## except:
  62. ## item_specF = 'N/A'
  63.  
  64. ##try:
  65. ## item_type = item_specs.index('\nType\n')
  66. ## item_specG = item_specs[item_type + 1]
  67. ##except:
  68. ## item_specG = 'N/A'
  69.  
  70. ## try:
  71. ## item_usage = item_specs.index('\nUsage\n')
  72. ## item_specH = item_specs[item_usage + 1]
  73. ##except:
  74. ## item_specH = 'N/A'
  75.  
  76. ##item_specs = [i.split(',', 2)[0] for i in item_specs]
  77.  
  78.  
  79. ## yield {'item_number': item_number, 'item_Height_Style': item_specA, 'item_color': item_specB,
  80. ## 'item_framecolor': item_specC, 'item_framematerial': item_specD, 'item_seatmaterial': item_specE,
  81. #### 'item_style': item_specF, 'item_type': item_specG, 'item_usage': item_specH}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement