Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- from scrapy.spider import Spider
- from scrapy.selector import Selector
- from TBFXSpider.items import TbfxspiderItem
- from scrapy.http import Request
- class TBFXSpider(Spider):
- name = "TBFXSpider"
- start_urls = [
- "http://s.taobao.com/"
- ]
- def parse(self, response):
- if response.url == "http://s.taobao.com/":
- print "******response url:%s******" % response.url
- url = "http://s.taobao.com/search?spm=1.7274553.1997520241.3.bKiaLP&q=%E8%BF%9E%E8%A1%A3%E8%A3%99&refpid=420462_1006&source=tbsy&style=grid&tab=all&sort=sale-desc"
- yield Request(url,callback=self.parse)
- else:
- sel = Selector(response)
- items = []
- keys = sel.xpath('//*[@id="J_itemlistCont"]/div')
- for key in keys:
- item = TbfxspiderItem()
- d = keys.xapth('div[4]/div[1]/a/span[2]').extract()
- item['desc'] = [d1.encode("utf-8") for d1 in d]
- items.append(item)
- yield item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement