Advertisement
Guest User

Untitled

a guest
Mar 28th, 2015
220
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.03 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. from scrapy.spider import Spider
  4. from scrapy.selector import Selector
  5. from TBFXSpider.items import TbfxspiderItem
  6. from scrapy.http import Request
  7.  
  8. class TBFXSpider(Spider):
  9. name = "TBFXSpider"
  10. start_urls = [
  11. "http://s.taobao.com/"
  12. ]
  13.  
  14. def parse(self, response):
  15. if response.url == "http://s.taobao.com/":
  16. print "******response url:%s******" % response.url
  17. url = "http://s.taobao.com/search?spm=1.7274553.1997520241.3.bKiaLP&q=%E8%BF%9E%E8%A1%A3%E8%A3%99&refpid=420462_1006&source=tbsy&style=grid&tab=all&sort=sale-desc"
  18. yield Request(url,callback=self.parse)
  19. else:
  20. sel = Selector(response)
  21. items = []
  22. keys = sel.xpath('//*[@id="J_itemlistCont"]/div')
  23.  
  24. for key in keys:
  25. item = TbfxspiderItem()
  26. d = keys.xapth('div[4]/div[1]/a/span[2]').extract()
  27. item['desc'] = [d1.encode("utf-8") for d1 in d]
  28. items.append(item)
  29. yield item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement