Chucha2005

Scrapy Crawler

Feb 11th, 2017
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.95 KB | None | 0 0
  1. from scrapy import Spider
  2. from scrapy.spiders import Rule
  3. from scrapy.linkextractors import LinkExtractor
  4. import datetime
  5.  
  6. from armani.items import armaniItem
  7.  
  8.  
  9. class ArmaniSpider(Spider):
  10.     name = 'araden'
  11.     allowed_domains = ['armani.com']
  12.  
  13.     start_urls = [
  14.     'http://www.armani.com/us/giorgioarmani/women/onlinestore/suits-and-jackets'
  15. ]
  16.  
  17. rules = (
  18.     Rule(LinkExtractor(allow=('http://www.armani.com/us/', 'http://www.armani.com/fr/')),
  19.     follow=True, ),
  20.     Rule(LinkExtractor(allow=('.*_cod.*\.html', )), callback='parse_item'),
  21. )
  22.  
  23.  
  24. def parse_item(self, response):
  25.     item = armaniItem()
  26.     self.logger.info('Hi, this is an item page! %s', response.url)
  27.     item['name'] = response.xpath('//h2[@class="productName"]/text()').extract()
  28.     item['price'] = response.xpath('//span[@class="priceValue"]/text()')[0].extract()
  29.     if response.xpath('//span[@class="currency"]/text()')[0].extract() == '$':
  30.             item['currency'] = 'USD'
  31.     else:
  32.             item['currency'] = response.xpath('//span[@class="currency"]/text()')[0].extract()
  33.     item['category'] = response.xpath('//li[@class="selected leaf"]/a/text()').extract()
  34.     item['sku'] = response.xpath('//span[@class="MFC"]/text()').extract()
  35.     if response.xpath('//div[@class="soldOutButton"]/text()').extract() == True or response.xpath('//span[@class="outStock"]/text()').extract() == True:
  36.             item['avaliability'] = 'No'
  37.     else:
  38.             item['avaliability'] = 'Yes'
  39.     item['time'] = datetime.datetime.now().strftime("%Y.%m.%d %H:%M")
  40.     item['color'] = response.xpath('//*[contains(@id, "color_")]/a/text()').extract()
  41.     item['size'] = response.xpath('//*[contains(@id, "sizew_")]/a/text()').extract()
  42.     if '/us/' in response.url:
  43.             item['region'] = 'US'
  44.     elif '/fr/' in response.url:
  45.             item['region'] = 'FR'
  46.     item['description'] = response.xpath('//div[@class="descriptionContent"]/text()')[0].extract()
  47.     return item
Advertisement
Add Comment
Please, Sign In to add comment