Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from scrapy import Spider
- from scrapy.spiders import Rule
- from scrapy.linkextractors import LinkExtractor
- import datetime
- from armani.items import armaniItem
- class ArmaniSpider(Spider):
- name = 'araden'
- allowed_domains = ['armani.com']
- start_urls = [
- 'http://www.armani.com/us/giorgioarmani/women/onlinestore/suits-and-jackets'
- ]
- rules = (
- Rule(LinkExtractor(allow=('http://www.armani.com/us/', 'http://www.armani.com/fr/')),
- follow=True, ),
- Rule(LinkExtractor(allow=('.*_cod.*\.html', )), callback='parse_item'),
- )
- def parse_item(self, response):
- item = armaniItem()
- self.logger.info('Hi, this is an item page! %s', response.url)
- item['name'] = response.xpath('//h2[@class="productName"]/text()').extract()
- item['price'] = response.xpath('//span[@class="priceValue"]/text()')[0].extract()
- if response.xpath('//span[@class="currency"]/text()')[0].extract() == '$':
- item['currency'] = 'USD'
- else:
- item['currency'] = response.xpath('//span[@class="currency"]/text()')[0].extract()
- item['category'] = response.xpath('//li[@class="selected leaf"]/a/text()').extract()
- item['sku'] = response.xpath('//span[@class="MFC"]/text()').extract()
- if response.xpath('//div[@class="soldOutButton"]/text()').extract() == True or response.xpath('//span[@class="outStock"]/text()').extract() == True:
- item['avaliability'] = 'No'
- else:
- item['avaliability'] = 'Yes'
- item['time'] = datetime.datetime.now().strftime("%Y.%m.%d %H:%M")
- item['color'] = response.xpath('//*[contains(@id, "color_")]/a/text()').extract()
- item['size'] = response.xpath('//*[contains(@id, "sizew_")]/a/text()').extract()
- if '/us/' in response.url:
- item['region'] = 'US'
- elif '/fr/' in response.url:
- item['region'] = 'FR'
- item['description'] = response.xpath('//div[@class="descriptionContent"]/text()')[0].extract()
- return item
Advertisement
Add Comment
Please, Sign In to add comment