sidorenkov

Answer #2

Aug 3rd, 2021
627
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import time
  2. import aiohttp
  3. import asyncio
  4.  
  5.  
  6. async def get_page(session, category: str, page_id: int) -> str:
  7.     if page_id:
  8.         url = f'https://www.ozon.ru/brand/{category}/?page={page_id}'
  9.     else:
  10.         url = f'https://www.ozon.ru/brand/{category}/'
  11.     print(f'get url: {url}')
  12.     async with session.get(url) as response:
  13.         return await response.text()
  14.  
  15.  
  16. async def load_data():
  17.     category_list = ['adidas-144082850', 'puma-87235756']
  18.  
  19.     tasks = []
  20.     async with aiohttp.ClientSession() as session:
  21.         for category in category_list:
  22.             for page_id in range(50):
  23.                 tasks.append(get_page(session, category, page_id))
  24.  
  25.         results = await asyncio.gather(*tasks)
  26.         for text in results:
  27.             pass
  28.             # обрабатываем полученный текст, сохраняем в файл/базу
  29.  
  30.  
  31. if __name__ == '__main__':
  32.     start_time = time.time()
  33.     loop = asyncio.get_event_loop()
  34.     loop.run_until_complete(load_data())
  35.     print("--- %s seconds ---" % (time.time() - start_time))
RAW Paste Data