SHARE
TWEET

Untitled

a guest Apr 23rd, 2019 73 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import scrapy
  2. from pprint import pprint
  3.  
  4. arraysg = []
  5.  
  6. class SillySetSpider(scrapy.Spider):
  7.     name = "silly_spider"
  8.     start_urls = ['https://pixabay.com/']
  9.  
  10.     custom_settings = {
  11.  
  12.             'DOWNLOAD_DELAY': '2',
  13.             'DEPTH_LIMIT': '1'
  14.            
  15.         }
  16.  
  17.     def parse(self, response):
  18.         SET_SELECTOR = '.item'
  19.         for silly in response.css(SET_SELECTOR):
  20.  
  21.             NAME_SELECTOR = 'h1 ::text'
  22.             PIECES_SELECTOR = './/dl[dt/text() = "Pieces"]/dd/a/text()'
  23.             MINIFIGS_SELECTOR = './/dl[dt/text() = "Minifigs"]/dd[2]/a/text()'
  24.             IMAGE_SELECTOR = 'img ::attr(src)'
  25.  
  26.             namesg = silly.css(NAME_SELECTOR).extract_first(),
  27.             piecessg = silly.xpath(PIECES_SELECTOR).extract_first(),
  28.             minifigssg = silly.xpath(MINIFIGS_SELECTOR).extract_first(),
  29.             imagesg = silly.css(IMAGE_SELECTOR).extract_first(),
  30.            
  31.             pprint(imagesg)
  32.            
  33.         NEXT_PAGE_SELECTOR = '.next a ::attr(href)'
  34.         next_page = response.css(NEXT_PAGE_SELECTOR).extract_first()
  35.         if next_page:
  36.             yield scrapy.Request(
  37.                 response.urljoin(next_page),
  38.                 callback=self.parse
  39.             )
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top