Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import jinyuz
- class JyzCrawler(jinyuz.Spider):
- name = 'blogspider'
- start_urls = ['https://jinyuzprodigy.me']
- def parse(self, response):
- for title in response.css('h2.entry-title'):
- yield {'title': title.css('a ::text').extract_first()}
- next_page = response.css('div.prev-post > a ::attr(href)').extract_first()
- if next_page:
- yield scrapy.Request(response.urljoin(next_page), callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement