Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Пример использования запуска простого асинхронного пуллера тасков.
- Может работать как на потоках, так и на процессах.
- На основе примеров в официальной документации
- https://docs.python.org/3/library/concurrent.futures.html
- И на основе статьи
- https://dev-gang.ru/article/kak-sdelat-kod-python-parallelnym-s-pomosczu-3h-strok-x32tjod5s6/
- """
- import concurrent.futures
- import urllib.request
- import time
- import datetime
- URLS = ['http://www.foxnews.com/',
- 'http://www.cnn.com/',
- 'http://www.bbc.com/',
- 'http://www.nytimes.com/',
- 'https://echo.msk.ru/',
- 'https://nat-geo.ru/',
- ]
- # Supprot function to print current time for debug print
- def dt_now(): return datetime.datetime.now().strftime('%H:%M:%S.%f')
- # Retrieve a single page and report the URL and contents
- def load_url(url, timeout):
- time.sleep(0.001)
- print(f'[{dt_now()}] start load_url {url}')
- t1 = time.monotonic()
- with urllib.request.urlopen(url, timeout=timeout) as conn:
- res = conn.read()
- t2 = time.monotonic()
- print(f'[{dt_now()}] load_url delta {round(t2-t1, 2)}s url {url}')
- return res
- def main(PoolExecutor, max_workers=1):
- t1 = time.monotonic()
- if PoolExecutor:
- executor_name = PoolExecutor.__name__
- else:
- executor_name = 'without pooling'
- print(f'{executor_name} start main function:')
- if PoolExecutor:
- # We can use a with statement to ensure threads are cleaned up promptly
- with PoolExecutor(max_workers=max_workers) as executor:
- # Start the load operations and mark each future with its URL
- future_to_url = {time.sleep(0.001) or executor.submit(load_url, url, 60): url for url in URLS}
- for future in concurrent.futures.as_completed(future_to_url):
- url = future_to_url[future]
- try:
- data = future.result()
- except Exception as exc:
- print(f'[{dt_now()}] {url} generated an exception: {exc}')
- else:
- print(f'[{dt_now()}] {url} page is {len(data)} bytes')
- else:
- for url in URLS:
- try:
- time.sleep(0.001)
- data = load_url(url, 60)
- except Exception as exc:
- print(f'[{dt_now()}] {url} generated an exception: {exc}')
- else:
- print(f'[{dt_now()}] {url} page is {len(data)} bytes')
- t2 = time.monotonic()
- print(f'{executor_name} total executing time {round(t2-t1, 2)}s for max_workers {max_workers}')
- if __name__ == '__main__':
- # запуск для сравнения параллельной работы пуллера потоков и пуллера процессов
- # * пуллер процессов будет быстрее, если в задачах будет преобладать использование CPU
- # * пуллер потоков будет одинаково работать, но тратить меньше ресурсов, если в задачах преобладает ожидание ввода/вывода
- # запуск в обычном синхронном режиме
- main(None)
- print()
- max_workers = 6
- # запуск в асинхронном режиме с использованием воркеров-потоков
- main(concurrent.futures.ThreadPoolExecutor, max_workers)
- print()
- # запуск в асинхронном режиме с использованием воркеров-процессов
- main(concurrent.futures.ProcessPoolExecutor, max_workers)
- # run statisitc #
- """
- without pooling start main function:
- [11:00:24.075816] start load_url http://www.foxnews.com/
- [11:00:30.107250] load_url delta 6.03s url http://www.foxnews.com/
- [11:00:30.107250] http://www.foxnews.com/ page is 277709 bytes
- [11:00:30.111428] start load_url http://www.cnn.com/
- [11:00:45.122152] load_url delta 15.02s url http://www.cnn.com/
- [11:00:45.122152] http://www.cnn.com/ page is 1115841 bytes
- [11:00:45.124163] start load_url http://www.bbc.com/
- [11:00:55.071005] load_url delta 9.95s url http://www.bbc.com/
- [11:00:55.071962] http://www.bbc.com/ page is 296305 bytes
- [11:00:55.075136] start load_url http://www.nytimes.com/
- [11:01:11.237171] load_url delta 16.16s url http://www.nytimes.com/
- [11:01:11.237171] http://www.nytimes.com/ page is 1467771 bytes
- [11:01:11.240182] start load_url https://echo.msk.ru/
- [11:01:16.486315] load_url delta 5.23s url https://echo.msk.ru/
- [11:01:16.486315] https://echo.msk.ru/ page is 226628 bytes
- [11:01:16.488317] start load_url https://nat-geo.ru/
- [11:01:21.548233] load_url delta 5.06s url https://nat-geo.ru/
- [11:01:21.548233] https://nat-geo.ru/ page is 100076 bytes
- without pooling total executing time 57.47s for max_workers 1
- ThreadPoolExecutor start main function:
- [11:01:21.555370] start load_url http://www.foxnews.com/
- [11:01:21.556191] start load_url http://www.cnn.com/
- [11:01:21.558191] start load_url http://www.bbc.com/
- [11:01:21.560231] start load_url http://www.nytimes.com/
- [11:01:21.561238] start load_url https://echo.msk.ru/
- [11:01:21.562226] start load_url https://nat-geo.ru/
- [11:01:22.293995] load_url delta 0.73s url https://nat-geo.ru/
- [11:01:22.293995] https://nat-geo.ru/ page is 100076 bytes
- [11:01:22.554456] load_url delta 1.0s url http://www.foxnews.com/
- [11:01:22.554456] http://www.foxnews.com/ page is 277709 bytes
- [11:01:26.920251] load_url delta 5.36s url https://echo.msk.ru/
- [11:01:26.920251] https://echo.msk.ru/ page is 226628 bytes
- [11:01:31.675108] load_url delta 10.11s url http://www.nytimes.com/
- [11:01:31.675108] http://www.nytimes.com/ page is 1467771 bytes
- [11:01:31.796914] load_url delta 10.23s url http://www.bbc.com/
- [11:01:31.796914] http://www.bbc.com/ page is 296305 bytes
- [11:01:36.444974] load_url delta 14.89s url http://www.cnn.com/
- [11:01:36.444974] http://www.cnn.com/ page is 1115841 bytes
- ThreadPoolExecutor total executing time 14.91s for max_workers 6
- ProcessPoolExecutor start main function:
- [11:01:36.678976] start load_url http://www.foxnews.com/
- [11:01:36.690976] start load_url http://www.cnn.com/
- [11:01:36.700976] start load_url http://www.bbc.com/
- [11:01:36.717975] start load_url http://www.nytimes.com/
- [11:01:36.723975] start load_url https://echo.msk.ru/
- [11:01:36.740981] start load_url https://nat-geo.ru/
- [11:01:37.090603] load_url delta 0.34s url https://nat-geo.ru/
- [11:01:37.091604] https://nat-geo.ru/ page is 100076 bytes
- [11:01:37.144602] load_url delta 0.45s url http://www.foxnews.com/
- [11:01:37.146611] http://www.foxnews.com/ page is 277708 bytes
- [11:01:41.545505] load_url delta 4.83s url https://echo.msk.ru/
- [11:01:41.545505] https://echo.msk.ru/ page is 226731 bytes
- [11:01:46.635736] load_url delta 9.94s url http://www.bbc.com/
- [11:01:46.636737] http://www.bbc.com/ page is 296253 bytes
- [11:01:47.506517] load_url delta 10.8s url http://www.nytimes.com/
- [11:01:47.510517] http://www.nytimes.com/ page is 1468270 bytes
- [11:01:51.169220] load_url delta 14.48s url http://www.cnn.com/
- [11:01:51.172221] http://www.cnn.com/ page is 1115841 bytes
- ProcessPoolExecutor total executing time 14.77s for max_workers 6
- """
Add Comment
Please, Sign In to add comment