Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- from requests import Session
- from bs4 import BeautifulSoup
- import os
- import shutil
- import sys
- import filecmp
- from glob import glob
- from apscheduler.schedulers.blocking import BlockingScheduler
- from pytz import utc
- sched = BlockingScheduler(timezone=utc)
- class Worker:
- def __init__(self):
- self.session = Session()
- self.to_copy = []
- def get_schedule_urls(self):
- urls = dict()
- self.session.post(
- 'https://eduhouse.ru/login/index.php',
- data={"username": os.environ['login'], "password": os.environ['password']}
- )
- response = BeautifulSoup(self.session.get('http://eduhouse.ru').text, "html5lib")
- cleared_resp = response.find("div", {"id": "inst1451"})
- schedule = cleared_resp.findAll('a', text=re.compile('Расписание'))[:4]
- changes = cleared_resp.findAll('a', href=re.compile('\d\d.\d\d.\d\d.xls'))
- schedule_dict = {link.text: link['href'] for link in schedule}
- changes_dict = {link.text: link['href'] for link in changes}
- urls['schedule'] = schedule_dict
- urls['changes'] = changes_dict
- return urls
- def download(self, urls):
- data = dict()
- data['schedule'] = {title: self.session.get(urls['schedule'][title]).content for title in urls['schedule']}
- data['changes'] = {title.replace('/', '.'): self.session.get(urls['changes'][title]).content for title in urls['changes']}
- self.data = data
- def save_files(self):
- try:
- os.makedirs('temp')
- for folder in self.data:
- os.makedirs('temp/' + folder)
- for title in self.data[folder]:
- with open('temp/{}/{}.xls'.format(folder, title), 'wb') as file:
- file.write(self.data[folder][title])
- if os.path.exists('master'):
- schedule_diff = filecmp.dircmp('temp/schedule', 'master/schedule')
- for file in list(set(os.listdir('temp/schedule')) - set(schedule_diff.same_files)):
- shutil.copy('temp/schedule/'+file, 'master/schedule/'+file)
- self.to_copy.append('master/schedule/'+file)
- if os.path.exists('temp/changes'):
- try:
- os.makedirs('master/changes')
- for file in os.listdir('temp/changes'):
- shutil.copy('temp/changes/'+file, 'master/changes/'+file)
- self.to_copy.append('master/changes/'+file)
- except FileExistsError:
- for file in os.listdir('temp/changes'):
- if file not in os.listdir('master/changes'):
- shutil.rmtree('master/changes')
- os.mkdir('master/changes')
- shutil.copy('temp/changes/'+file, 'master/changes/'+file)
- self.to_copy.append('master/changes/'+file)
- else:
- shutil.copytree('temp', 'master')
- self.to_copy.extend(glob('master/*/*'))
- except FileExistsError:
- print('Folder already exists. Deleting...', end=' ')
- try:
- shutil.rmtree('temp')
- except Exception as e:
- print('Fatal error. Exiting')
- print(e)
- sys.exit(0)
- print('done!')
- self.save_files()
- finally:
- shutil.rmtree('temp')
- def post_new(self):
- """just writes changes to log file until POST request can be handled"""
- if self.to_copy:
- with open('log.txt', 'a') as file:
- file.write('{}\n\n'.format(self.to_copy))
- def run(self):
- self.download(self.get_schedule_urls())
- self.save_files()
- self.post_new()
- print('Wrote to a log file at ', os.getcwd())
- @sched.scheduled_job('interval', minutes=25)
- def run_task():
- w = Worker()
- w.run()
- print(os.listdir('/app/master'))
- print(os.listdir('/app/master/changes'))
- print(os.listdir('/app/master/schedule'))
- if __name__ == '__main__':
- print('started at ', os.getcwd())
- sched.start()
Advertisement
Add Comment
Please, Sign In to add comment