Amazon Site Scraper

import requests
import bs4

# make sure the query isn't too general - instead of "books" choose "books about trees" or smth
__QUERY__ = 'stickers'


class SessionData:
    def __init__(self, headers=None):
        if headers is None:
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"}
        initial = requests.get(url="https://amazon.com/home", headers=headers)

        cookies_dict = initial.cookies.get_dict()
        session_id = cookies_dict['session-id']
        session_id_time = cookies_dict['session-id-time']
        i18n_prefs = cookies_dict['i18n-prefs']

        self.session_id = session_id
        self.session_id_time = session_id_time
        self.i18n_prefs = i18n_prefs

        self.__dict__ = {'session-id': self.session_id, 'session-id-time': self.session_id_time,
                         'i18n-prefs': self.i18n_prefs}


class AmazonSession:
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"}
        self.sessionData = SessionData()

    def getRawSearchHTML(self, query: str, page: int = 1) -> requests.Response:
        return requests.get(
            url=f'https://www.amazon.com/s?k={query}&page={page}',
            headers=self.headers,
            cookies=self.sessionData.__dict__
        )

    def getPaginationAmount(self, query: str) -> int:
        return 100  # amazon does not have an actual system to find the maximum amount, https://amazaon.com/s?k=cord&page=100 xd


s = AmazonSession()

for i in range(s.getPaginationAmount((__QUERY__))):
    html_ = s.getRawSearchHTML(__QUERY__, i + 1).text

    soup = bs4.BeautifulSoup(html_, 'html.parser')
    product_containers = soup.find_all(name='div', attrs={'data-component-type': 's-search-result'})

    print(f'found {len(product_containers)} results for the query: {__QUERY__} (page {i})')
    if len(product_containers) == 0:
        print('end of pagination detected')
        break

    for product in product_containers:
        title_card = product.find(name='div', attrs={'data-cy': 'title-recipe'})
        if title_card:
            title = title_card.find(name='span')
            if title:
                print(f'* Found product: {title.text}')
                desc_ = product.find(name='span', attrs={'class': 'a-size-base-plus a-color-base a-text-normal'})
                if desc_:
                    print(f'\tdescription -> {desc_.text}')
                    img_ = product.find(name='img', attrs={'class': 's-image'})
                    if img_ and img_['src']:
                        print(f'\t\tthumbnail image => {img_['src']}')
            else:
                print('[no title found]')
        else:
            print('product is invalid]')

        print("\n")