Advertisement
vlpap

quotes_spider_pt4.py

Feb 25th, 2021
717
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.88 KB | None | 0 0
  1. import scrapy
  2. from lesson_03_scraping.items import Lesson03ScrapingItem
  3. from scrapy.loader import ItemLoader
  4.  
  5. class QuotesSpider(scrapy.Spider):
  6.     name = "quotes_pt4"
  7.    
  8.     start_urls = ['http://quotes.toscrape.com/page/1/']
  9.    
  10.     def parse(self, response):
  11.         for quote in response.css('div.quote'):
  12.             loader = ItemLoader(item=Lesson03ScrapingItem(), selector=quote)
  13.             loader.add_css('quote_content', 'span.text::text')
  14.             loader.add_css('author', 'small.author::text')
  15.             loader.add_css('tags', 'div.tags a.tag::text')
  16.             quote_item = loader.load_item()      
  17.             yield quote_item    
  18.        
  19.         next_page = response.css('li.next a::attr(href)').get()
  20.         if next_page is not None:
  21.             next_page = response.urljoin(next_page)
  22.             yield scrapy.Request(next_page, callback=self.parse)
  23.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement