Advertisement
jfabella

novel_list.py

Sep 9th, 2019
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.23 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from scrapy.loader import ItemLoader
  4. from boxnovel_list.items import BoxnovelListItem
  5.  
  6.  
  7.  
  8. class NovelListSpider(scrapy.Spider):
  9. name = 'novel_list'
  10.  
  11. def start_requests(self):
  12. url = 'https://boxnovel.com/novel/?m_orderby=alphabet'
  13. yield scrapy.Request(url=url,callback = self.parse)
  14.  
  15. def parse(self, response):
  16. for data in response.selector.xpath("//div[@class='item-summary']"):
  17. loader = ItemLoader(item=BoxnovelListItem(),selector = data,response=response)
  18. loader.add_xpath('title',".//div[@class='post-title font-title']/h5/a/text()")
  19. loader.add_xpath('link',".//div[@class='post-title font-title']/h5/a/@href")
  20. loader.add_xpath('score',".//div[@class='meta-item rating']/div/span/text()")
  21. loader.add_xpath('last_chapter',".//div[@class='list-chapter']/div[1]/span[1]/a/text()")
  22. yield loader.load_item()
  23.  
  24. next_page = response.selector.xpath("//div[@class='nav-previous pull-left']/a/@href").extract_first()
  25.  
  26. if next_page is not None:
  27. next_page_link = response.urljoin(next_page)
  28. yield scrapy.Request(url = next_page_link, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement