Advertisement
Guest User

Untitled

a guest
Nov 30th, 2014
342
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.03 KB | None | 0 0
  1. import scrapy
  2.  
  3. from scrapy.contrib.spiders import Rule, CrawlSpider
  4. from scrapy.contrib.linkextractors import LinkExtractor
  5. from postergully.items import PostergullyItem
  6.  
  7.  
  8. class PostergullySpider(CrawlSpider):
  9. name = "postergully"
  10. allowed_domains = ["postergully.com"]
  11. start_urls = ["http://www.postergully.com/collections/postergully-specials"]
  12. rules = [Rule(LinkExtractor(allow =['/products/.*']), 'parse_postergully')]
  13.  
  14. def parse_postergully(self, response):
  15.  
  16. item =PostergullyItem()
  17.  
  18. item['name'] = response.xpath("//h1/text()").extract()
  19.  
  20. # link_rel = response.xpath("//p/a[@class='title']/@href").extract()
  21. # item['link'] = ['http://www.postergully.com'+link_rel[0]]
  22.  
  23. item['price'] = response.xpath("//span[@class='price']/text()").extract()
  24.  
  25. link = response.xpath("//a[@id='mainImage']/img/@scr").extract()
  26. item['image_urls'] = ['http:'+link[0]]
  27.  
  28. # item['image_urls'] = response.xpath("//a[@id='mainImage']/img/@src").extract()
  29. # item['image_urls'] = ['http:'+item['image_urls']]
  30.  
  31. return item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement