chankapack

Lowes scraper

Aug 26th, 2024
259
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.56 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import pandas as pd
  4.  
  5. # Function to get all product URLs from a category page
  6. def get_product_urls(category_url):
  7. headers = {
  8. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
  9. 'Accept-Language': 'en-US,en;q=0.9',
  10. 'Referer': 'https://www.google.com/',
  11. 'Connection': 'keep-alive'
  12. }
  13. response = requests.get(category_url, headers=headers)
  14.  
  15. print(f"Fetching category page: {category_url}, Status Code: {response.status_code}")
  16.  
  17. if response.status_code != 200:
  18. print(f"Failed to fetch page: {category_url}")
  19. return []
  20.  
  21. soup = BeautifulSoup(response.content, 'html.parser')
  22.  
  23. product_urls = []
  24. for link in soup.find_all('a', href=True):
  25. href = link.get('href')
  26. if '/pd/' in href:
  27. product_urls.append('https://www.lowes.com' + href)
  28.  
  29. print(f"Found {len(product_urls)} products in category: {category_url}")
  30.  
  31. return product_urls
  32.  
  33. # Function to scrape product price and calculate discount percentage
  34. def scrape_product_details(product_url):
  35. headers = {
  36. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
  37. 'Accept-Language': 'en-US,en;q=0.9',
  38. 'Referer': 'https://www.google.com/',
  39. 'Connection': 'keep-alive'
  40. }
  41. response = requests.get(product_url, headers=headers)
  42.  
  43. print(f"Scraping URL: {product_url}, Status Code: {response.status_code}")
  44.  
  45. if response.status_code != 200:
  46. print(f"Failed to fetch page: {product_url}")
  47. return None
  48.  
  49. soup = BeautifulSoup(response.content, 'html.parser')
  50.  
  51. product_name_tag = soup.find('h1', {'class': 'styles__H1-sc-11vpuyu-0 krJSUv typography variant--h1 align--left product-brand-description'})
  52. original_price_tag = soup.find('span', {'class': 'now-price'})
  53. discounted_price_tag = soup.find('span', {'class': 'was-price'})
  54.  
  55. if product_name_tag:
  56. product_name = product_name_tag.text.strip()
  57. print(f"Product Name: {product_name}")
  58. else:
  59. print("Product name not found")
  60. return None
  61.  
  62. if original_price_tag and discounted_price_tag:
  63. try:
  64. original_price = float(original_price_tag.text.replace('$', '').replace(',', '').strip())
  65. discounted_price = float(discounted_price_tag.text.replace('$', '').replace(',', '').strip())
  66. discount_percent = ((original_price - discounted_price) / original_price) * 100
  67.  
  68. print(f"Original Price: ${original_price}, Discounted Price: ${discounted_price}, Discount Percent: {discount_percent}%")
  69.  
  70. return {
  71. 'product_name': product_name,
  72. 'product_url': product_url,
  73. 'original_price': original_price,
  74. 'discounted_price': discounted_price,
  75. 'discount_percent': discount_percent
  76. }
  77. except ValueError:
  78. print("Price conversion error")
  79. return None
  80. else:
  81. print("Price tags not found")
  82. return None
  83.  
  84. # Function to scrape deals from multiple categories
  85. def scrape_deals(category_urls, discount_threshold=20):
  86. all_deals = []
  87.  
  88. for category_url in category_urls:
  89. print(f"Scraping category: {category_url}")
  90. product_urls = get_product_urls(category_url)
  91.  
  92. for product_url in product_urls:
  93. product_details = scrape_product_details(product_url)
  94. if product_details and product_details['discount_percent'] >= discount_threshold:
  95. all_deals.append(product_details)
  96.  
  97. return all_deals
  98.  
  99. # Example category URLs (replace these with actual URLs from Lowe's)
  100. category_urls = [
  101. 'https://www.lowes.com/pl/microwaves/4294715798?goToProdList=true',
  102. 'https://www.lowes.com/c/Drills-drivers-Power-tools-Tools',
  103. # Add more categories as needed
  104. ]
  105.  
  106. # Define the discount threshold (e.g., 20% or more)
  107. discount_threshold = 0 # Set to 0 for testing
  108.  
  109. # Start the scraping process
  110. print("Starting scraping process...")
  111.  
  112. # Scrape deals
  113. deals = scrape_deals(category_urls, discount_threshold)
  114.  
  115. # Ensure there are deals before attempting to create a DataFrame
  116. if deals:
  117. deals_df = pd.DataFrame(deals)
  118. print(deals_df)
  119. deals_df.to_csv('lowes_deals.csv', index=False)
  120. else:
  121. print("No deals found that meet the discount threshold.")
  122.  
Advertisement
Add Comment
Please, Sign In to add comment