Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- import os
- import asyncio
- import aiohttp
- import datetime
- import logging
- logging.basicConfig(level=logging.DEBUG)
- logger = logging.getLogger(__name__)
- loop = asyncio.get_event_loop()
- class RequestException(Exception):
- def __init__(self, message, code):
- self.message = 'Response error message: {}, code: {}'.format(message, code)
- class Fetcher(object):
- def __init__(self, interval=60*25):
- self.interval = interval
- self.session = aiohttp.ClientSession()
- self.semaphore = asyncio.Semaphore(10)
- self.status = ''
- async def get(self, url, method="get", post_data=None):
- with self.session:
- with aiohttp.Timeout(300):
- try:
- data = None
- if method == 'get':
- response = await self.session.request('get', url)
- else:
- response = await self.session.request('post', url, data=post_data)
- if response:
- if response.status == 200:
- data = await response.read()
- await response.release()
- else:
- await response.release()
- raise RequestException(response.reason, response.status)
- return data
- except Exception as e:
- raise e
- async def get_schedule_urls(self):
- urls = []
- post_data = {"username": os.environ['login'], "password": os.environ['password']}
- raw_data = await self.get('https://eduhouse.ru/login/index.php', method='post', post_data=post_data)
- # ... beautifulsoup work on raw_data...
- return urls
- async def do_job(self):
- # get urls
- self.status = '[{}] fetch urls'.format(datetime.datetime.now())
- urls = []
- try:
- urls = await self.get_schedule_urls()
- except RequestException as e:
- logger.error('RequestException: {}'.format(e.message))
- except Exception:
- logger.exception('BroadException: ')
- # download this shit
- downloaded = []
- if len(urls):
- self.status = '[{}] download this shiiit'.format(datetime.datetime.now())
- tasks = []
- for u in urls:
- tasks.append(self.get(u))
- with (await self.semaphore):
- downloaded = await asyncio.gather(*tasks, return_exceptions=True)
- # check for downloaded item != some exception
- # save this shit
- if len(downloaded):
- self.status = '[{}] save this shiiit'.format(datetime.datetime.now())
- await self.fs_write()
- async def fs_write(self):
- # save
- pass
- async def run(self):
- while True:
- await self.do_job()
- await asyncio.sleep(self.interval)
- async def print_status(self):
- while True:
- print(self.status)
- await asyncio.sleep(5)
- if __name__ == '__main__':
- f = Fetcher()
- try:
- # make parallel tasks
- tasks = [loop.create_task(f.run()), loop.create_task(f.print_status())]
- loop.run_until_complete(asyncio.wait(tasks))
- finally:
- loop.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement