Advertisement
apl-mhd

add page number

Jun 26th, 2019
237
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.90 KB | None | 0 0
  1. import  scrapy
  2.  
  3. class AuthorSpider(scrapy.Spider):
  4.  
  5.     name = "quotes"
  6.  
  7.     start_urls = [
  8.         'http://quotes.toscrape.com/page/1/',
  9.  
  10.     ]
  11.  
  12.  
  13.  
  14.     page=0;
  15.  
  16.  
  17.  
  18.  
  19.  
  20.     def parse(self, response):
  21.         quotes = response.css('div.quote')
  22.  
  23.         AuthorSpider.page +=1
  24.  
  25.         for quote in quotes:
  26.  
  27.             content = quote.css('span.text::text').get()
  28.             author = quote.css('small.author::text').get()
  29.             tags = quote.css('a.tag::text').getall()
  30.  
  31.  
  32.             yield {
  33.  
  34.                 'content' : content,
  35.                 'author' : author,
  36.                 'tags' : tags,
  37.  
  38.                 'page' : AuthorSpider.page
  39.  
  40.  
  41.             }
  42.  
  43.         next_page = response.css('li.next a::attr(href)').get()
  44.  
  45.         if next_page is not None:
  46.             next_page = response.urljoin(next_page)
  47.             yield scrapy.Request(next_page, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement