Untitled

import json

import scrapy


class BrickSetSpider(scrapy.Spider):
    name = "osta_spider"  # just a name for the spider.
    COUNTER = 0
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0'
    }
    url = "https://www.osta.ee/kategooria/arvutid/lauaarvutid"

    def start_requests(self):
        # a list of URLs that you start to crawl from. We'll start with one URL.

        yield scrapy.http.Request(self.url, headers=self.headers)

    def parse(self, response):
        """
        We’ll use CSS selectors for now since CSS is the easier option and a
        perfect fit for finding all the sets on the page. If you look at the
        HTML for the page, you'll see that each set is specified with the
        class set. Since we're looking for a class, we'd use .set for our
        CSS selector. All we have to do is pass that selector into the
        response object
        """
        MAIN_CONTAINER_SELECTOR = '.main-content__section-helper'
        container = response.css(MAIN_CONTAINER_SELECTOR)
        THUMB_CONTENT_SELECTOR = '.offer-thumb__content'
        THUMB_IMAGE_SELECTOR = '.offer-thumb__image'


        for thumb_content, thumb_image in zip(container.css(THUMB_CONTENT_SELECTOR),
                                              container.css(THUMB_IMAGE_SELECTOR)):
            """The brickset object we’re looping over has its own css method,
            so we can pass in a selector to locate child elements.
            """
            TITLE_SELECTOR = 'h3 ::attr(title)'
            PRICE_SELECTOR = '.offer-thumb__price--current ::text'
            IMAGE_SELECTOR = 'img ::attr(data-original)'

            name = thumb_content.css(TITLE_SELECTOR).extract_first()
            price = thumb_content.css(PRICE_SELECTOR).extract_first()
            image = thumb_image.css(IMAGE_SELECTOR).extract_first()

            data = {
                'ID': self.COUNTER,
                'name': name,
                'price': price.strip() if price else price,
                'image': image
            }
            yield data

            self.COUNTER += 1

        NEXT_PAGE_SELECTOR = 'a.icon.next.page-link ::attr(href)'
        next_page =response.css(NEXT_PAGE_SELECTOR).extract_first()

        if next_page:
            url = response.urljoin("https://www.osta.ee/" + next_page)
            yield scrapy.Request(url, self.parse, headers=self.headers)