Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from shopclues.items import ShopcluesItem
- from scrapy.contrib.spiders import CrawlSpider, Rule
- from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
- class ShopcluesSpider(scrapy.Spider):
- name = 'shopclues'
- allowed_domains = ['http://www.shopclues.com/search?
- q=samsung&sc_z=4444&z=0']
- start_urls = ['http://www.shopclues.com/search?
- q=samsung&sc_z=4444&z=0/']
- rules = (
- Rule(SgmlLinkExtractor(allow=()), callback="parse_items", follow=
- True))
- def parse(self, response):
- prices =
- response.xpath('//span[@class="p_price"]/text()').extract()
- discounts =
- response.xpath('//span[@class="prd_discount"]/text()').extract()
- for item in zip(prices,discounts):
- scraped_info = {
- 'prices' : item[0],
- 'discounts' : item[1]
- }
- yield scraped_info
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement