Guest User

Untitled

a guest
May 19th, 2020
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.61 KB | None | 0 0
  1. import re
  2. import scrapy
  3.  
  4. from urllib.parse import urlparse
  5.  
  6. BASE_URL = 'https://mysite.com'
  7.  
  8.  
  9. class MySiteSpider(scrapy.Spider):
  10.     name = 'my_site'
  11.     allowed_domains = ['mysite.com']
  12.     start_urls = [
  13.         BASE_URL + 'product/657019065/',
  14.         BASE_URL + 'product/657019066/',
  15.         BASE_URL + 'product/657019067/',
  16.     ]
  17.  
  18.     def parse(self, response):
  19.         data = {
  20.             'article': urlparse(response.url).path.split('/')[2],
  21.             'offers': []
  22.         }
  23.         for item in response.xpath('//div[contains(@class, "n-snippet-list")]'
  24.                                    '/div[contains(@class, "n-snippet-card")]'):
  25.             price = item.xpath('.//div[@class="price"]/text()').get()
  26.             data['offers'].append({'shop': '', 'price': price})
  27.  
  28.         next_page_path = response.xpath('//div[contains(@class, "n-pager")]'
  29.                                         '/a[contains(@class, "n-pager__button-next")]'
  30.                                         '/@href').get()
  31.         if next_page_path is not None:
  32.             next_page = BASE_URL + next_page_path
  33.             next_page = response.urljoin(next_page)
  34.             yield scrapy.Request(next_page, callback=self.parse_next, cb_kwargs=dict(data=data))
  35.  
  36.     def parse_next(self, response, data):
  37.         for item in response.xpath('//div[contains(@class, "n-snippet-list")]'
  38.                                    '/div[contains(@class, "n-snippet-card")]'):
  39.             price = item.xpath('.//div[@class="price"]/text()').get()
  40.             data['offers'].append({'shop': '', 'price': price})
  41.  
  42.         yield dict(data=data)
Advertisement
Add Comment
Please, Sign In to add comment