Advertisement
Guest User

Untitled

a guest
Jan 29th, 2015
161
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.01 KB | None | 0 0
  1. class UntisPage:
  2.     def __init__(self, data):
  3.         self.data = data
  4.         #print data
  5.         self.soup = BeautifulSoup(data)
  6.        
  7.         title = self.soup.find('div', 'mon_title')
  8.         print title.string
  9.        
  10.         m = re.match(r'^(?P<date>\d+\.\d+\.\d{4}) \w+ \(Seite (?P<cur_page>\d+) / (?P<num_pages>\d+)\)$', title.string)
  11.         if m:
  12.             self.cur_page = int(m.group('cur_page')) - 1
  13.             self.num_pages = int(m.group('num_pages'))
  14.         else:
  15.             self.cur_page = 0
  16.             self.num_pages = 1
  17.             m = re.match(r'(?P<date>\d+\.\d+\.\d{4}) \w+$', title.string)
  18.  
  19.         self.date = datetime.strptime(m.group('date'), '%d.%m.%Y').date()
  20.        
  21.        
  22.         print self
  23.        
  24.         #open('untispage_%s_%d.html' % (self.date, self.cur_page), 'w').write(self.soup.prettify())
  25.        
  26.    
  27.     def get_content(self):
  28.         return self.soup.find('table', 'mon_list')
  29.    
  30.     def get_info(self):
  31.         return self.soup.find('table', 'mon_head')
  32.    
  33.     def __repr__(self):
  34.         return "Untis page (%d/%d) from %s" % (self.cur_page, self.num_pages, self.date)
  35.  
  36. ################################
  37.  
  38. if __name__ == "__main__":
  39.     import sys
  40.    
  41.     last_program_url = REFERER
  42.     program_url = START_URL
  43.    
  44.     pages = {}
  45.  
  46.     program = ProgramPage(get_https(program_url, last_program_url))
  47.  
  48.     print program_url
  49.     print "iframe url", program.get_iframe_url()
  50.  
  51.     program2 = ProgramPage(get_https(program.get_iframe_url(), program_url))
  52.  
  53.     if re.search(r'Passwort', program2.data):
  54.         print "Need to authenticate :-("
  55.         m = re.match(r'(https://.*/)[^/]*', program.get_iframe_url())
  56.         url = m.group(1) + program2.soup.find('form')['action']
  57.         resp = post_https(url, {
  58.             'ctl02$txtBenutzername' : 'cjd',
  59.             'ctl02$txtPasswort': 'petersberg',
  60.             'ctl02$btnLogin': 'weiter',
  61.             '__VIEWSTATE': program2.soup.find('input', id='__VIEWSTATE')['value'],
  62.             '__EVENTVALIDATION': program2.soup.find('input', id='__EVENTVALIDATION')['value'],
  63.         }, program_url)
  64.         program2 = ProgramPage(resp)
  65.  
  66.     print "iframe url2", program2.get_iframe_url()
  67.  
  68.     data = get_https(program2.get_iframe_url(), program_url)
  69.     m = re.match(r'.*(?P<url>https://.*Program\.aspx[^"]+).*', data, re.M | re.S)
  70.     program_url = m.group('url')
  71.  
  72.     viewState = None
  73.  
  74.     for i in range(20):
  75.         print "PROGRAM_URL:", program_url
  76.         if viewState:
  77.             data = post_https(program_url, {
  78.                 '__VIEWSTATE': viewState,
  79.                 '__EVENTTARGET': 'Timer1',
  80.                 '__EVENTARGUMENT': ''
  81.             }, program_url)
  82.         else:
  83.             data = get_https(program_url, program_url)
  84.  
  85.         m = re.match(r'.*(?P<url>https://.*Data[^"]+\.htm).*', data, re.M | re.S)
  86.         b = m.group('url')
  87.  
  88.         untisPage = UntisPage(get_https(b, program_url))
  89.        
  90.         # Break if we already had that page
  91.         if untisPage.date in pages:
  92.             print pages
  93.             print untisPage.cur_page
  94.             cPage = pages[untisPage.date]
  95.             idx = untisPage.cur_page
  96.             if idx < len(cPage.pages) and cPage.pages[untisPage.cur_page] and all([all(p.pages) for p in pages.values()]):
  97.                 print pages
  98.                 break
  99.  
  100.         if not untisPage.date in pages:
  101.             pages[untisPage.date] = CollectedPage(untisPage.date)
  102.        
  103.         pages[untisPage.date].update(untisPage)
  104.  
  105.         last_program_url = program_url
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement