Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from scrapy.spiders import CrawlSpider, Rule
- from scrapy.linkextractors import LinkExtractor
- from armani.items import ArmaniItem
- import datetime
- class ArmaniProducts(CrawlSpider):
- name = 'armani_products'
- allowed_domains = ['www.armani.com']
- start_urls = [
- 'http://www.armani.com/us/giorgioarmani/women/onlinestore/suits-and-jackets',
- ]
- rules = (
- # Extract links matching 'item.php' and parse them with the spider's method parse_item
- Rule(LinkExtractor(allow=('http://www.armani.com/us/giorgioarmani/', 'http://www.armani.com/fr/giorgioarmani/', )), follow=True),
- Rule(LinkExtractor(allow=('.*_cod.*\.html', )), callback='parse_item'),
- )
- def parse_item(self, response):
- self.logger.info('Hi, this is an item page! %s', response.url)
- item = ArmaniItem()
- item['name'] = response.xpath('//h2[@class="productName"]/text()').extract()
- item['price'] = response.xpath('//span[@class="priceValue"]/text()')[0].extract()
- return item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement