Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def parse(self, response):
- # import sys
- global converter
- try:
- converter = html2text.HTML2Text()
- converter.ignore_links = True
- except:
- print "error"
- print response
- # for sel in response.xpath("//div/div/div/div/div/div[3]/div/div/div[1]/div[1]"):
- for sel in response.xpath("/html/body/div/div/div/div/div/div/div/div"):
- for tag in sel.xpath('div[1]/div'):
- print converter.handle(tag.extract())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement