Advertisement
dereksir

Untitled

Aug 25th, 2023
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.80 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4.  
  5. def write_to_csv(product_data):
  6.     # Open the CSV file for writing
  7.     with open('products.csv', 'w', newline='', encoding='utf-8') as csvfile:
  8.         # Define the field names for the CSV header
  9.         fieldnames = ['Product Name', 'Price']
  10.          # Create a DictWriter object
  11.         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
  12.      
  13.         # Write CSV header
  14.         writer.writeheader()
  15.        
  16.         # Loop through the product data list
  17.         for product in product_data:
  18.             # Write a new row with product name and price
  19.             writer.writerow({'Product Name': product['name'], 'Price': product['price']})
  20.  
  21. base_url = 'https://scrapingclub.com/exercise/list_infinite_scroll/'
  22. page_number = 1  # Start with the base URL
  23. total_pages = 6
  24. product_data = []
  25.  
  26. while page_number <= total_pages:
  27.     # Construct ajax request URL
  28.     url = f'{base_url}?page={page_number}'
  29.    
  30.     # Make GET request
  31.     response = requests.get(url)
  32.    
  33.     # Retrieve the response content
  34.     html_content = response.text
  35.        
  36.     # Parse the HTML content using Beautiful Soup
  37.     soup = BeautifulSoup(html_content, 'html.parser')
  38.        
  39.     # Extract product names and prices
  40.     products = soup.select('div.p-4 h4 > a')
  41.     prices = soup.select('div.p-4 h5')
  42.        
  43.     # Collect product information
  44.     for product, price in zip(products, prices):
  45.         product_name = product.get_text(strip=True)
  46.         product_price = price.get_text(strip=True)
  47.        
  48.         #add data to product_data []
  49.         product_data.append({'name': product_name, 'price': product_price})  
  50.        
  51.     # Move to the next page
  52.     page_number += 1
  53.  
  54. # Call the function to write data to CSV
  55. write_to_csv(product_data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement