Advertisement
Guest User

Untitled

a guest
Nov 21st, 2019
141
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | None | 0 0
  1. import aiohttp
  2.  
  3. WORKERS_COUNT = 5
  4. df = None
  5.  
  6.  
  7. async download(session, url):
  8.     async with session.get(url) as response:
  9.         assert response.status == 200
  10.         response_text = await response.read()
  11.  
  12.         return {
  13.             'status_code': response.status_code,
  14.             ...
  15.         }
  16.        
  17.  
  18. async process_data(row_index. session):
  19.     url = df.loc[index, 'url']
  20.     data = await download(session, url)
  21.  
  22.     df.loc[index, 'status_code'] = data.get('status_code')
  23.     ...
  24.  
  25.  
  26. async def worker(q, session):
  27.     while True:
  28.         row_index = await q.get()
  29.         # todo: остановиться если очередь пустая
  30.         await process_data(row_index)
  31.  
  32.  
  33. async def main(loop):
  34.     global df
  35.  
  36.     df = pd.read_pickle('with_sent.pkl')
  37.     # инициализировать данные
  38.     ...
  39.  
  40.     q = asyncio.Queue()
  41.     for i, _ in df.iterrows():
  42.         await q.put(i)
  43.    
  44.     with aiohttp.ClientSession(loop=loop) as session:
  45.         tasks = [
  46.             loop.create_task(worker(q, session))
  47.             for _ in range(WORKERS_COUNT)
  48.         ]
  49.  
  50.     await asyncio.wait(tasks)
  51.  
  52.     # сохранить данные
  53.     df.to_pickle('output.pkl')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement