Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from pprint import pprint
- arraysg = []
- class SillySetSpider(scrapy.Spider):
- name = "silly_spider"
- start_urls = ['https://pixabay.com/']
- custom_settings = {
- 'DOWNLOAD_DELAY': '2',
- 'DEPTH_LIMIT': '1'
- }
- def parse(self, response):
- SET_SELECTOR = '.item'
- for silly in response.css(SET_SELECTOR):
- NAME_SELECTOR = 'h1 ::text'
- PIECES_SELECTOR = './/dl[dt/text() = "Pieces"]/dd/a/text()'
- MINIFIGS_SELECTOR = './/dl[dt/text() = "Minifigs"]/dd[2]/a/text()'
- IMAGE_SELECTOR = 'img ::attr(src)'
- namesg = silly.css(NAME_SELECTOR).extract_first(),
- piecessg = silly.xpath(PIECES_SELECTOR).extract_first(),
- minifigssg = silly.xpath(MINIFIGS_SELECTOR).extract_first(),
- imagesg = silly.css(IMAGE_SELECTOR).extract_first(),
- pprint(imagesg)
- NEXT_PAGE_SELECTOR = '.next a ::attr(href)'
- next_page = response.css(NEXT_PAGE_SELECTOR).extract_first()
- if next_page:
- yield scrapy.Request(
- response.urljoin(next_page),
- callback=self.parse
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement