Untitled

import scrapy
from scrapy.exceptions import CloseSpider

class AmazonBooksSpider(scrapy.Spider):

    CLOSESPIDER_PAGECOUNT = 5
    name = 'amazon_books'
    def start_requests(self):
        yield scrapy.Request(
            url = 'https://www.amazon.com/s?bbn=1&rh=n%3A283155%2Cn%3A%211000%2Cn%3A1%2Cp_n_publication_date%3A1250226011&dc&fst=as%3Aoff&qid=1606224210&rnid=1250225011&ref=lp_1_nr_p_n_publication_date_0',

            callback = self.parse,
            meta = {'current_page': 1}
        )

    def parse(self, response):
        books = response.xpath('//*[@class="s-include-content-margin s-border-bottom s-latency-cf-section"]')
        # if len(books) == 0:
        #     raise CloseSpider
        for book in books:

            book_name = book.xpath('.//h2[@class="a-size-mini a-spacing-none a-color-base s-line-clamp-2"]/a/span/text()').get(),
            book_published_date = book.xpath('.//*[@class="a-size-base a-color-secondary a-text-normal"]/text()').get(),
            book_rating = book.xpath('.//*[@class="a-icon-alt"]/text()').get(),
            book_reviewers = book.xpath('.//a[@class="a-link-normal"]/span/text()').get(),
            book_image_url = book.xpath('.//*[@class="a-section aok-relative s-image-fixed-height"]/img/@src').get()

        urls = response.xpath('.//h2[@class="a-size-mini a-spacing-none a-color-base s-line-clamp-2"]/a/@href').get()
        for url in urls:
            yield scrapy.Request(url = response.urljoin(url),
                                callback = self.parse_item,
                                cb_kwargs = {
                                    'name':book_name,
                                    'date': book_published_date,
                                    'rating': book_rating,
                                    'reviewers': book_reviewers,
                                    'image_url': book_image_url
                                    })
    def parse_item(self, response, name,date,rating,reviewers,image_url):
        book_author = response.xpath('//*[@class="author notFaded"]/span/a/text()').get(),
        book_hardcover_sell_price = response.xpath('(//*[@class="slot-price"]/span)[4]/text()').get()
        book_kindle_sell_price = response.xpath('(//*[@class="slot-price"]/span)[1]/text()').get(),
        book_audio_price = response.xpath('(//*[@class="slot-price"]/span)[5]/text()').get(),
        book_hardcover_actual_price = response.xpath('.//*[@id="listPrice"]/text()').get(),
        book_list_price = response.xpath('//*[@id="listPrice"]/text()').get(),
        book_saving_amount = response.xpath('//*[@id="savingsAmount"]/text()').get()
        yield{
            'book_name':name,
            'book_author':book_author,
            'book_published_date':date,
            'book_hardcover_sell_price': book_hardcover_sell_price,
            'book_kindle_sell_price': book_kindle_sell_price,
            'book_audio_price':book_audio_price,
            'book_list_price': book_list_price,
            'book_saving_amount':book_saving_amount,
            'book_rating':rating,
            'book_reviewers':reviewers,
            'book_url': response.url,
            'book_image_url':image_url
            }

        next_page = response.meta['current_page'] + 1
        yield scrapy.Request(
                    url = 'https://www.amazon.com/s?i=stripbooks&bbn=1&rh=n%3A1%2Cp_n_feature_nine_browse-bin%3A3291437011&dc&fs=true&page='+ str(next_page) +'&qid=1623501355&rnid=3291435011&ref=sr_pg_1',
                    callback = self.parse_item,
                    meta = {'current_page':next_page}
                    )