Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import pandas as pd
- # Function to get all product URLs from a category page
- def get_product_urls(category_url):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
- 'Accept-Language': 'en-US,en;q=0.9',
- 'Referer': 'https://www.google.com/',
- 'Connection': 'keep-alive'
- }
- response = requests.get(category_url, headers=headers)
- print(f"Fetching category page: {category_url}, Status Code: {response.status_code}")
- if response.status_code != 200:
- print(f"Failed to fetch page: {category_url}")
- return []
- soup = BeautifulSoup(response.content, 'html.parser')
- product_urls = []
- for link in soup.find_all('a', href=True):
- href = link.get('href')
- if '/pd/' in href:
- product_urls.append('https://www.lowes.com' + href)
- print(f"Found {len(product_urls)} products in category: {category_url}")
- return product_urls
- # Function to scrape product price and calculate discount percentage
- def scrape_product_details(product_url):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
- 'Accept-Language': 'en-US,en;q=0.9',
- 'Referer': 'https://www.google.com/',
- 'Connection': 'keep-alive'
- }
- response = requests.get(product_url, headers=headers)
- print(f"Scraping URL: {product_url}, Status Code: {response.status_code}")
- if response.status_code != 200:
- print(f"Failed to fetch page: {product_url}")
- return None
- soup = BeautifulSoup(response.content, 'html.parser')
- product_name_tag = soup.find('h1', {'class': 'styles__H1-sc-11vpuyu-0 krJSUv typography variant--h1 align--left product-brand-description'})
- original_price_tag = soup.find('span', {'class': 'now-price'})
- discounted_price_tag = soup.find('span', {'class': 'was-price'})
- if product_name_tag:
- product_name = product_name_tag.text.strip()
- print(f"Product Name: {product_name}")
- else:
- print("Product name not found")
- return None
- if original_price_tag and discounted_price_tag:
- try:
- original_price = float(original_price_tag.text.replace('$', '').replace(',', '').strip())
- discounted_price = float(discounted_price_tag.text.replace('$', '').replace(',', '').strip())
- discount_percent = ((original_price - discounted_price) / original_price) * 100
- print(f"Original Price: ${original_price}, Discounted Price: ${discounted_price}, Discount Percent: {discount_percent}%")
- return {
- 'product_name': product_name,
- 'product_url': product_url,
- 'original_price': original_price,
- 'discounted_price': discounted_price,
- 'discount_percent': discount_percent
- }
- except ValueError:
- print("Price conversion error")
- return None
- else:
- print("Price tags not found")
- return None
- # Function to scrape deals from multiple categories
- def scrape_deals(category_urls, discount_threshold=20):
- all_deals = []
- for category_url in category_urls:
- print(f"Scraping category: {category_url}")
- product_urls = get_product_urls(category_url)
- for product_url in product_urls:
- product_details = scrape_product_details(product_url)
- if product_details and product_details['discount_percent'] >= discount_threshold:
- all_deals.append(product_details)
- return all_deals
- # Example category URLs (replace these with actual URLs from Lowe's)
- category_urls = [
- 'https://www.lowes.com/pl/microwaves/4294715798?goToProdList=true',
- 'https://www.lowes.com/c/Drills-drivers-Power-tools-Tools',
- # Add more categories as needed
- ]
- # Define the discount threshold (e.g., 20% or more)
- discount_threshold = 0 # Set to 0 for testing
- # Start the scraping process
- print("Starting scraping process...")
- # Scrape deals
- deals = scrape_deals(category_urls, discount_threshold)
- # Ensure there are deals before attempting to create a DataFrame
- if deals:
- deals_df = pd.DataFrame(deals)
- print(deals_df)
- deals_df.to_csv('lowes_deals.csv', index=False)
- else:
- print("No deals found that meet the discount threshold.")
Advertisement
Add Comment
Please, Sign In to add comment