Advertisement
Guest User

Untitled

a guest
Jun 23rd, 2018
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.84 KB | None | 0 0
  1. class DiscogsRecordsSpider(scrapy.Spider):
  2.     '''Scrapy web scraper for getting records data '''
  3.     name = 'discogs_records'
  4.  
  5.     def __init__(self, country=''):
  6.         super(DiscogsRecordsSpider, self).__init__()
  7.         self.country = country
  8.         self.start_urls = [
  9.             'http://www.discogs.com/search/?limit=50&page=1&country_exact=%s' % self.country
  10.         ]
  11.  
  12.  
  13.     def parse(self, response):
  14.         records = response.css('.card > a').css('a::attr(href)').extract()
  15.         for record in records:
  16.                 yield scrapy.Request(response.urljoin(record), callback=self.parse_records)
  17.         next_page = response.css('a[rel="next"]::attr(href)').extract_first()
  18.         if next_page is not None:
  19.             next_page = response.urljoin(next_page)
  20.             yield scrapy.Request(next_page, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement