Guest User

schedule_project

a guest
Sep 13th, 2016
198
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.27 KB | None | 0 0
  1. import re
  2. from requests import Session
  3. from bs4 import BeautifulSoup
  4. import os
  5. import shutil
  6. import sys
  7. import filecmp
  8. from glob import glob
  9. from apscheduler.schedulers.blocking import BlockingScheduler
  10. from pytz import utc
  11.  
  12. sched = BlockingScheduler(timezone=utc)
  13.  
  14. class Worker:
  15.     def __init__(self):
  16.         self.session = Session()
  17.         self.to_copy = []
  18.  
  19.     def get_schedule_urls(self):
  20.         urls = dict()
  21.         self.session.post(
  22.             'https://eduhouse.ru/login/index.php',
  23.             data={"username": os.environ['login'], "password": os.environ['password']}
  24.         )
  25.  
  26.         response = BeautifulSoup(self.session.get('http://eduhouse.ru').text, "html5lib")
  27.         cleared_resp = response.find("div", {"id": "inst1451"})
  28.  
  29.         schedule = cleared_resp.findAll('a', text=re.compile('Расписание'))[:4]
  30.         changes = cleared_resp.findAll('a', href=re.compile('\d\d.\d\d.\d\d.xls'))
  31.  
  32.         schedule_dict = {link.text: link['href'] for link in schedule}
  33.         changes_dict = {link.text: link['href'] for link in changes}
  34.         urls['schedule'] = schedule_dict
  35.         urls['changes'] = changes_dict
  36.         return urls
  37.  
  38.     def download(self, urls):
  39.         data = dict()
  40.         data['schedule'] = {title: self.session.get(urls['schedule'][title]).content for title in urls['schedule']}
  41.         data['changes'] = {title.replace('/', '.'): self.session.get(urls['changes'][title]).content for title in urls['changes']}
  42.         self.data = data
  43.  
  44.     def save_files(self):
  45.         try:
  46.             os.makedirs('temp')
  47.             for folder in self.data:
  48.                 os.makedirs('temp/' + folder)
  49.                 for title in self.data[folder]:
  50.                     with open('temp/{}/{}.xls'.format(folder, title), 'wb') as file:
  51.                         file.write(self.data[folder][title])
  52.             if os.path.exists('master'):
  53.                 schedule_diff = filecmp.dircmp('temp/schedule', 'master/schedule')
  54.                 for file in list(set(os.listdir('temp/schedule')) - set(schedule_diff.same_files)):
  55.                     shutil.copy('temp/schedule/'+file, 'master/schedule/'+file)
  56.                     self.to_copy.append('master/schedule/'+file)
  57.                 if os.path.exists('temp/changes'):
  58.                     try:
  59.                         os.makedirs('master/changes')
  60.                         for file in os.listdir('temp/changes'):
  61.                             shutil.copy('temp/changes/'+file, 'master/changes/'+file)
  62.                             self.to_copy.append('master/changes/'+file)
  63.                     except FileExistsError:
  64.                         for file in os.listdir('temp/changes'):
  65.                             if file not in os.listdir('master/changes'):
  66.                                 shutil.rmtree('master/changes')
  67.                                 os.mkdir('master/changes')
  68.                                 shutil.copy('temp/changes/'+file, 'master/changes/'+file)
  69.                                 self.to_copy.append('master/changes/'+file)
  70.  
  71.             else:
  72.                 shutil.copytree('temp', 'master')
  73.                 self.to_copy.extend(glob('master/*/*'))
  74.         except FileExistsError:
  75.             print('Folder already exists. Deleting...', end=' ')
  76.             try:
  77.                 shutil.rmtree('temp')
  78.             except Exception as e:
  79.                 print('Fatal error. Exiting')
  80.                 print(e)
  81.                 sys.exit(0)
  82.             print('done!')
  83.             self.save_files()
  84.         finally:
  85.             shutil.rmtree('temp')
  86.  
  87.     def post_new(self):
  88.         """just writes changes to log file until POST request can be handled"""
  89.         if self.to_copy:
  90.             with open('log.txt', 'a') as file:
  91.                 file.write('{}\n\n'.format(self.to_copy))
  92.  
  93.     def run(self):
  94.         self.download(self.get_schedule_urls())
  95.         self.save_files()
  96.         self.post_new()
  97.         print('Wrote to a log file at ', os.getcwd())
  98.  
  99.  
  100. @sched.scheduled_job('interval', minutes=25)
  101. def run_task():
  102.     w = Worker()
  103.     w.run()
  104.     print(os.listdir('/app/master'))
  105.     print(os.listdir('/app/master/changes'))
  106.     print(os.listdir('/app/master/schedule'))
  107.  
  108. if __name__ == '__main__':
  109.     print('started at ', os.getcwd())
  110.     sched.start()
Advertisement
Add Comment
Please, Sign In to add comment