Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.24 KB | None | 0 0
  1. import scrapy
  2. from pprint import pprint
  3.  
  4. arraysg = []
  5.  
  6. class SillySetSpider(scrapy.Spider):
  7. name = "silly_spider"
  8. start_urls = ['https://pixabay.com/']
  9.  
  10. custom_settings = {
  11.  
  12. 'DOWNLOAD_DELAY': '2',
  13. 'DEPTH_LIMIT': '1'
  14.  
  15. }
  16.  
  17. def parse(self, response):
  18. SET_SELECTOR = '.item'
  19. for silly in response.css(SET_SELECTOR):
  20.  
  21. NAME_SELECTOR = 'h1 ::text'
  22. PIECES_SELECTOR = './/dl[dt/text() = "Pieces"]/dd/a/text()'
  23. MINIFIGS_SELECTOR = './/dl[dt/text() = "Minifigs"]/dd[2]/a/text()'
  24. IMAGE_SELECTOR = 'img ::attr(src)'
  25.  
  26. namesg = silly.css(NAME_SELECTOR).extract_first(),
  27. piecessg = silly.xpath(PIECES_SELECTOR).extract_first(),
  28. minifigssg = silly.xpath(MINIFIGS_SELECTOR).extract_first(),
  29. imagesg = silly.css(IMAGE_SELECTOR).extract_first(),
  30.  
  31. pprint(imagesg)
  32.  
  33. NEXT_PAGE_SELECTOR = '.next a ::attr(href)'
  34. next_page = response.css(NEXT_PAGE_SELECTOR).extract_first()
  35. if next_page:
  36. yield scrapy.Request(
  37. response.urljoin(next_page),
  38. callback=self.parse
  39. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement