Advertisement
neonua666

rrr

Dec 21st, 2016
164
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.03 KB | None | 0 0
  1. from scrapy.spiders import CrawlSpider, Rule
  2. from scrapy.linkextractors import LinkExtractor
  3. from armani.items import ArmaniItem
  4. import datetime
  5.  
  6.  
  7. class ArmaniProducts(CrawlSpider):
  8.     name = 'armani_products'
  9.     allowed_domains = ['www.armani.com']
  10.     start_urls = [
  11.         'http://www.armani.com/us/giorgioarmani/women/onlinestore/suits-and-jackets',
  12.     ]
  13.  
  14.     rules = (
  15.         # Extract links matching 'item.php' and parse them with the spider's method parse_item
  16.         Rule(LinkExtractor(allow=('http://www.armani.com/us/giorgioarmani/', 'http://www.armani.com/fr/giorgioarmani/', )), follow=True),
  17.         Rule(LinkExtractor(allow=('.*_cod.*\.html', )), callback='parse_item'),
  18.     )
  19.  
  20.     def parse_item(self, response):
  21.         self.logger.info('Hi, this is an item page! %s', response.url)
  22.         item = ArmaniItem()
  23.         item['name'] = response.xpath('//h2[@class="productName"]/text()').extract()
  24.         item['price'] = response.xpath('//span[@class="priceValue"]/text()')[0].extract()
  25.         return item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement