Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import scrapy
- from scrapy.loader import ItemLoader
- from boxnovel_list.items import BoxnovelListItem
- class NovelListSpider(scrapy.Spider):
- name = 'novel_list'
- def start_requests(self):
- url = 'https://boxnovel.com/novel/?m_orderby=alphabet'
- yield scrapy.Request(url=url,callback = self.parse)
- def parse(self, response):
- for data in response.selector.xpath("//div[@class='item-summary']"):
- loader = ItemLoader(item=BoxnovelListItem(),selector = data,response=response)
- loader.add_xpath('title',".//div[@class='post-title font-title']/h5/a/text()")
- loader.add_xpath('link',".//div[@class='post-title font-title']/h5/a/@href")
- loader.add_xpath('score',".//div[@class='meta-item rating']/div/span/text()")
- loader.add_xpath('last_chapter',".//div[@class='list-chapter']/div[1]/span[1]/a/text()")
- yield loader.load_item()
- next_page = response.selector.xpath("//div[@class='nav-previous pull-left']/a/@href").extract_first()
- if next_page is not None:
- next_page_link = response.urljoin(next_page)
- yield scrapy.Request(url = next_page_link, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement