Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import aiohttp
- import asyncio
- import time
- from bs4 import BeautifulSoup
- async def fetch_page(session, url):
- # Make GET request using session
- async with session.get(url) as response:
- # Retrieve HTML content
- html_content = await response.text()
- # Parse HTML content using BeautifulSoup
- soup = BeautifulSoup(html_content, 'html.parser')
- # Return parsed HTML
- return soup
- async def main():
- # Initialize a list of URLs
- urls = ["https://scrapeme.live/shop/", "https://scrapeme.live/shop/page/2/", "https://scrapeme.live/shop/page/3/"]
- # Time Tracking: Start Time
- start_time = time.time()
- # Create an AIOHTTP session
- async with aiohttp.ClientSession() as session:
- # Initialize tasks list
- tasks = []
- # Loop through URLs and append tasks
- for url in urls:
- tasks.append(fetch_page(session, url))
- # Group and Execute tasks concurrently
- htmls = await asyncio.gather(*tasks)
- # Time Tracking: End Time
- end_time = time.time()
- # Process the extracted information
- for url, soup in zip(urls, htmls):
- # Find products in <ul>
- product_list = soup.find('ul', class_='products')
- # Find all <li>s
- products = product_list.find_all('li')
- # Iterate through products and extract name, price, and image of each.
- for product in products:
- name = product.find('h2').text
- price = product.find('span', class_='amount').text
- image = product.find('img')['src']
- print(f"Product from {url}:\nName: {name}\nPrice: {price}\nImage: {image}\n")
- # Calculate and print the time taken
- print(f"Time taken: {end_time - start_time} seconds")
- # Run the main function
- asyncio.run(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement