Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from scrapy.contrib.spiders import Rule, CrawlSpider
- from scrapy.contrib.linkextractors import LinkExtractor
- from postergully.items import PostergullyItem
- class PostergullySpider(CrawlSpider):
- name = "postergully"
- allowed_domains = ["postergully.com"]
- start_urls = ["http://www.postergully.com/collections/postergully-specials"]
- rules = [Rule(LinkExtractor(allow =['/products/.*']), 'parse_postergully')]
- def parse_postergully(self, response):
- item =PostergullyItem()
- item['name'] = response.xpath("//h1/text()").extract()
- # link_rel = response.xpath("//p/a[@class='title']/@href").extract()
- # item['link'] = ['http://www.postergully.com'+link_rel[0]]
- item['price'] = response.xpath("//span[@class='price']/text()").extract()
- link = response.xpath("//a[@id='mainImage']/img/@scr").extract()
- item['image_urls'] = ['http:'+link[0]]
- # item['image_urls'] = response.xpath("//a[@id='mainImage']/img/@src").extract()
- # item['image_urls'] = ['http:'+item['image_urls']]
- return item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement