Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import scrapy
- from urllib.parse import urlparse
- BASE_URL = 'https://mysite.com'
- class MySiteSpider(scrapy.Spider):
- name = 'my_site'
- allowed_domains = ['mysite.com']
- start_urls = [
- BASE_URL + 'product/657019065/',
- BASE_URL + 'product/657019066/',
- BASE_URL + 'product/657019067/',
- ]
- def parse(self, response):
- data = {
- 'article': urlparse(response.url).path.split('/')[2],
- 'offers': []
- }
- for item in response.xpath('//div[contains(@class, "n-snippet-list")]'
- '/div[contains(@class, "n-snippet-card")]'):
- price = item.xpath('.//div[@class="price"]/text()').get()
- data['offers'].append({'shop': '', 'price': price})
- next_page_path = response.xpath('//div[contains(@class, "n-pager")]'
- '/a[contains(@class, "n-pager__button-next")]'
- '/@href').get()
- if next_page_path is not None:
- next_page = BASE_URL + next_page_path
- next_page = response.urljoin(next_page)
- yield scrapy.Request(next_page, callback=self.parse_next, cb_kwargs=dict(data=data))
- def parse_next(self, response, data):
- for item in response.xpath('//div[contains(@class, "n-snippet-list")]'
- '/div[contains(@class, "n-snippet-card")]'):
- price = item.xpath('.//div[@class="price"]/text()').get()
- data['offers'].append({'shop': '', 'price': price})
- yield dict(data=data)
Advertisement
Add Comment
Please, Sign In to add comment