Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class DiscogsRecordsSpider(scrapy.Spider):
- '''Scrapy web scraper for getting records data '''
- name = 'discogs_records'
- def __init__(self, country=''):
- super(DiscogsRecordsSpider, self).__init__()
- self.country = country
- self.start_urls = [
- 'http://www.discogs.com/search/?limit=50&page=1&country_exact=%s' % self.country
- ]
- def parse(self, response):
- records = response.css('.card > a').css('a::attr(href)').extract()
- for record in records:
- yield scrapy.Request(response.urljoin(record), callback=self.parse_records)
- next_page = response.css('a[rel="next"]::attr(href)').extract_first()
- if next_page is not None:
- next_page = response.urljoin(next_page)
- yield scrapy.Request(next_page, callback=self.parse)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement