[just corrections] so_q_74511414

import requests
import pandas as pd
from bs4 import BeautifulSoup
import json
import csv
from datetime import datetime
from datetime import date

now = datetime.now()
today = date.today()


class PetBarnProdScraper:

    all_info = []

    def fetch(self, url):
        print(f"HTTP GET request to URL: {url}", end="")
        res = requests.get(url)
        print(f" | Status Code: {res.status_code}")

        return res

    def parse(self, response):
        soup = BeautifulSoup(response.text, "html.parser")
        product_urls = [a.get("href") for a in soup.select("a.product-item-link")]
        product_ids = [
            pid.get("id").split("-")[-1] for pid in soup.select("div.product-item-info")
        ]
        titles = [
            a.text.replace("\n", "").strip() for a in soup.select("a.product-item-link")
        ]
        old_price = [
            p.select_one("span.price").text for p in soup.select("span.old-price")
        ]
        ratings = [r.get("title") for r in soup.select("div.rating-result")]
        no_of_reviews = [review.text for review in soup.select("a.action.view")]
        data = (
            soup.select('script[type="text/x-magento-init"]')[3]
            .text.replace("\n", "")
            .strip()
        )
        data_json = json.loads(data)
        data_j = json.loads(
            data_json["*"]["Overdose_AdobeAnalytics/js/view/datalayer"]["datalayer"][0]
        )

        for idx in range(len(titles)):
            try:
                ratings_count = ratings[idx]
                reviews_count = no_of_reviews[idx]
                last_price = old_price[idx]
            except:
                ratings_count = "N/A"
                reviews_count = "N/A"
                last_price = "N/A"
            d = {
                "Scraped_Date": now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[0],
                "Scraped_Time": now.strftime("%m/%d/%Y, %H:%M:%S").split(",")[1],
                "product_name": titles[idx],
                "price": data_j["PLP"]["products"][idx]["productPrice"],
                "old_price": last_price,
                "ratings": ratings_count,
                "number_of_reviews": reviews_count,
                "productSKU": data_j["PLP"]["products"][idx]["productSKU"],
                "productSize": data_j["PLP"]["products"][idx]["productSize"],
                "priceWithoutTax": data_j["PLP"]["products"][idx][
                    "productPriceLessTax"
                ],
                "lifeStage": data_j["PLP"]["products"][idx]["lifeStage"],
            }

            #!# unlooped below
            prod_id = product_ids[idx]
            details = soup.select_one(
                f"script:-soup-contains('[data-role=swatch-option-{prod_id}]')"
            )
            if details:
                json_details = json.loads(details.text.replace("\n", "").strip())
                dataJC = json_details[f"[data-role=swatch-option-{prod_id}]"][
                    "Magento_Swatches/js/swatch-renderer"
                ]["jsonConfig"]
                productId = dataJC["productId"]
                jcInfs = [
                    {
                        "productId": productId,
                        "optionKey": k,
                        "sku": "?",
                        "index": v["1299"] if "1299" in v else None,
                    }
                    for k, v in dataJC["index"].items()
                ]
                orInfs = [
                    ("optionPrices", "amount", "reverseNest"),
                    ("dynamic", "value", "nest1"),
                    ("labels", "", "reverseNest"),
                    ("hasEndDate", "", "noNesting"),
                ]
                relevInfs = []
                for kk, vk, nt in orInfs:
                    if kk not in dataJC:
                        continue
                    if nt == "noNesting":
                        relevInfs += [(kk, vk, dataJC[kk])]
                        continue
                    if nt == "nest1":
                        relevInfs += [(kk, vk, vd) for kk, vd in dataJC[kk].items()]
                        continue
                    if nt != "reverseNest":
                        ## can put a default action here
                        continue
                    ## nt == 'reverseNest'
                    orInf = {}
                    for pk, po in dataJC[kk].items():
                        for kpo, vpo in po.items():
                            if kpo not in orInf:
                                orInf[kpo] = {}
                            orInf[kpo][pk] = vpo

                    relevInfs += [(kk, vk, vi) for kk, vi in orInf.items()]

                for i, j in enumerate(jcInfs):
                    for kk, vk, vd in relevInfs:
                        if j["optionKey"] not in vd:
                            continue
                        relevInf = vd[j["optionKey"]]
                        if type(relevInf) != dict:
                            j[kk] = relevInf
                        elif vk in relevInf and relevInf[vk]:
                            j[kk] = relevInf[vk]
                    # combine with main variation
                    jcInfs[i] = {
                        k: v
                        for k, v in (
                            list(d.items())
                            + [(jk, jv) for jk, jv in j.items() if jk not in d]
                        )
                    }
                for j in jcInfs:
                    self.all_info.append(j)

            # else: self.all_info.append(d) #!# if you want to not repeat any prod/variant
            #!# unlooped above

            #!# is this intentional?
            self.all_info.append(d)
            #!# didn't remove but this will create additional row per product
            #!# unless wrapped into else block from if details

    def to_csv(self):

        df = pd.DataFrame(self.all_info).fillna("")

        df.to_csv(f"{today}_petbarn.csv", index=False)

        print(f'Stored results to "{today}_petbarn.csv"') #!# just

    def run(self):
        for i in range(1, 2):  # total_number of pages
            url = f"https://www.petbarn.com.au/dogs/dog-food/dry-dog-food?p={i}"

            response = self.fetch(url)

            self.parse(response)

        self.to_csv()

if __name__ == "__main__":
    scraper = PetBarnProdScraper()
    scraper.run()