Untitled

import scrapy


class Mingkh(scrapy.Spider):
    name = 'houses'

    start_urls = ['https://dom.mingkh.ru/kemerovskaya-oblast/novokuzneck/houses?page=1']

    def parse(self, response):
        house_page_links = response.css('td a::attr(href)')
        yield from response.follow_all(house_page_links, self.parse_house)

        pagination_links = response.css('li a[rel="next"]::attr(href)')
        yield from response.follow_all(pagination_links, self.parse)

    def parse_house(self, response):
        def extract_with_css(query):
            return response.css(query).get(default='').strip()

        company_urls = response.css('dd span.clickable::attr(data-url)')
        yield from response.follow_all(company_urls, self.parse_company)

        yield {
            'address': extract_with_css('div.block-heading-two h1::text'),
            'url': response.url,

        }

    def parse_company(self, response):
        def extract_with_css(query):
            return response.css(query).get(default='').strip()

        def extract_with_xpath(query):
            return response.xpath(query).get(default='').strip()

        yield {
            'company_url': response.url,
            'company_name': extract_with_css('div.block-heading-two h1::text'),
            'company_phone': extract_with_xpath('/html[1]/body[1]/div[1]/div[2]/div[2]/div[3]/div[1]/dl[1]/dd[4]/text()'),
            'company_address': extract_with_xpath('/html[1]/body[1]/div[1]/div[2]/div[2]/div[3]/div[1]/dl[1]/dd[3]/text()'),

        }