Advertisement
Guest User

html_scrape

a guest
Aug 14th, 2019
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.64 KB | None | 0 0
  1. async def get_page(url):
  2.     async with semaphore:
  3.         timeout = aiohttp.ClientTimeout(connect=15)
  4.         async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
  5.             proxy = await get_proxy(session)
  6.             # Get podcast html
  7.             while True:
  8.                 try:
  9.                     async with session.get(url, proxy=proxy) as r:
  10.                         print('URL: {} STATUS: {}'.format(url, r.status))
  11.                         if r.status != 200:
  12.                             await asyncio.sleep(1)
  13.                             proxy = await get_proxy(session)
  14.                             continue
  15.                         main_html = await r.text()
  16.                         break
  17.                 except Exception:
  18.                     await asyncio.sleep(1)
  19.                     proxy = await get_proxy(session)
  20.                     continue
  21.             # Get podcast listeners html
  22.             # Get podbay json
  23.             # Get podcastrss html
  24.             # Get getrssfeed html
  25.             result = {
  26.                 'main_html': main_html,
  27.                 'listeners_html': listeners_html,
  28.                 'podbay_json': podbay_json,
  29.                 'podcastrss_html': podcastrss_html,
  30.                 'getrssfeed_html': getrssfeed_html
  31.             }
  32.             pub = await aioredis.create_redis('redis://localhost')
  33.             await pub.publish_json('chan:1', result)
  34.             pub.close()
  35.  
  36.  
  37. if __name__ == "__main__":
  38.     urls = []
  39.     loop = asyncio.get_event_loop()
  40.     tasks = [asyncio.ensure_future(get_page(url)) for url in urls]
  41.     loop.run_until_complete(asyncio.wait(tasks))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement