Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import time
- import aiohttp
- import asyncio
- # Function to retrieve HTML content for synchronous web scraping
- async def fetch_page_async(session, url):
- async with session.get(url) as response:
- return await response.text()
- # Function to retrieve HTML content for asynchronous web scraping
- def fetch_page_sync(url):
- response = requests.get(url)
- return response.text
- # Function to scrape multiple pages synchronously and return time taken
- def synchronous_scraper(urls):
- start_time = time.time()
- for url in urls:
- fetch_page_sync(url)
- end_time = time.time()
- return end_time - start_time
- # Function to scrape multiple pages asynchronously and return time taken
- async def asynchronous_scraper(urls):
- start_time = time.time()
- async with aiohttp.ClientSession() as session:
- tasks = [fetch_page_async(session, url) for url in urls]
- await asyncio.gather(*tasks)
- end_time = time.time()
- return end_time - start_time
- # URL lists
- urls = ["https://scrapeme.live/shop/", "https://scrapeme.live/shop/page/2/", "https://scrapeme.live/shop/page/3/"]
- # Benchmark
- synchronous_time = synchronous_scraper(urls)
- asynchronous_time = asyncio.run(asynchronous_scraper(urls))
- print(f"Synchronous Time: {synchronous_time} seconds")
- print(f"Asynchronous Time: {asynchronous_time} seconds")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement