Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class UntisPage:
- def __init__(self, data):
- self.data = data
- #print data
- self.soup = BeautifulSoup(data)
- title = self.soup.find('div', 'mon_title')
- print title.string
- m = re.match(r'^(?P<date>\d+\.\d+\.\d{4}) \w+ \(Seite (?P<cur_page>\d+) / (?P<num_pages>\d+)\)$', title.string)
- if m:
- self.cur_page = int(m.group('cur_page')) - 1
- self.num_pages = int(m.group('num_pages'))
- else:
- self.cur_page = 0
- self.num_pages = 1
- m = re.match(r'(?P<date>\d+\.\d+\.\d{4}) \w+$', title.string)
- self.date = datetime.strptime(m.group('date'), '%d.%m.%Y').date()
- print self
- #open('untispage_%s_%d.html' % (self.date, self.cur_page), 'w').write(self.soup.prettify())
- def get_content(self):
- return self.soup.find('table', 'mon_list')
- def get_info(self):
- return self.soup.find('table', 'mon_head')
- def __repr__(self):
- return "Untis page (%d/%d) from %s" % (self.cur_page, self.num_pages, self.date)
- ################################
- if __name__ == "__main__":
- import sys
- last_program_url = REFERER
- program_url = START_URL
- pages = {}
- program = ProgramPage(get_https(program_url, last_program_url))
- print program_url
- print "iframe url", program.get_iframe_url()
- program2 = ProgramPage(get_https(program.get_iframe_url(), program_url))
- if re.search(r'Passwort', program2.data):
- print "Need to authenticate :-("
- m = re.match(r'(https://.*/)[^/]*', program.get_iframe_url())
- url = m.group(1) + program2.soup.find('form')['action']
- resp = post_https(url, {
- 'ctl02$txtBenutzername' : 'cjd',
- 'ctl02$txtPasswort': 'petersberg',
- 'ctl02$btnLogin': 'weiter',
- '__VIEWSTATE': program2.soup.find('input', id='__VIEWSTATE')['value'],
- '__EVENTVALIDATION': program2.soup.find('input', id='__EVENTVALIDATION')['value'],
- }, program_url)
- program2 = ProgramPage(resp)
- print "iframe url2", program2.get_iframe_url()
- data = get_https(program2.get_iframe_url(), program_url)
- m = re.match(r'.*(?P<url>https://.*Program\.aspx[^"]+).*', data, re.M | re.S)
- program_url = m.group('url')
- viewState = None
- for i in range(20):
- print "PROGRAM_URL:", program_url
- if viewState:
- data = post_https(program_url, {
- '__VIEWSTATE': viewState,
- '__EVENTTARGET': 'Timer1',
- '__EVENTARGUMENT': ''
- }, program_url)
- else:
- data = get_https(program_url, program_url)
- m = re.match(r'.*(?P<url>https://.*Data[^"]+\.htm).*', data, re.M | re.S)
- b = m.group('url')
- untisPage = UntisPage(get_https(b, program_url))
- # Break if we already had that page
- if untisPage.date in pages:
- print pages
- print untisPage.cur_page
- cPage = pages[untisPage.date]
- idx = untisPage.cur_page
- if idx < len(cPage.pages) and cPage.pages[untisPage.cur_page] and all([all(p.pages) for p in pages.values()]):
- print pages
- break
- if not untisPage.date in pages:
- pages[untisPage.date] = CollectedPage(untisPage.date)
- pages[untisPage.date].update(untisPage)
- last_program_url = program_url
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement