Advertisement
Guest User

Untitled

a guest
Dec 22nd, 2017
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.02 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. from datetime import time, date, datetime, timedelta
  4. from time import sleep
  5. from json import loads
  6.  
  7. from lxml import etree
  8. from requests import Session, exceptions
  9. import re
  10.  
  11. username=''
  12. password=''
  13.  
  14. #location='Barendrecht (Zwaalweg 1)'
  15. location='' # 'Breda (Heerbaan 4)' # Can be empty
  16. product='' # 'BTH' # Can be empty
  17. start_time="14:00"
  18. end_time='' # Can be empty
  19.  
  20. start=date.today()+timedelta(days=18)
  21. end=start+timedelta(weeks=3)
  22.  
  23. # start=date.today()+timedelta(weeks=4)
  24. # end=start+timedelta(days=5)
  25.  
  26. postfix='{} {}-{}'.format(username, start_time, end_time).replace(':','.')
  27.  
  28. f=open(postfix+'.log','w')
  29.  
  30. # Prepare the data before to save time within the request
  31. buying_data={
  32.     "ctl00$ctl00$DefaultContent$DefaultContent$Divisions$referenceDataCombobox_Divisions": "DIVISIE AFNAME THEORIE",
  33.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamTimeFrom": start_time,
  34.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamTimeUpToInclusive": end_time,
  35.     "ctl00$ctl00$DefaultContent$DefaultContent$Find.x": "33",
  36.     "ctl00$ctl00$DefaultContent$DefaultContent$Find.y": "38",
  37.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeFridays": "on",
  38.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeMondays": "on",
  39.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeSaturdays": "on",
  40.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeThursdays": "on",
  41.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeTuesdays": "on",
  42.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeWednesdays": "on",
  43.     "ctl00$ctl00$MasterScriptManager": "ctl00$ctl00$MasterScriptManager|ctl00$ctl00$DefaultContent$DefaultContent$Find",
  44.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateFromDatePicker_dateInput_ClientState": '{{"enabled":true,"emptyMessage":"","validationText":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","valueAsString":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00","lastSetTextBoxValue":"{0.day:02d}-{0.month:02d}-{0.year}"}}'.format(start),
  45.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_dateInput_ClientState": '{{"enabled":true,"emptyMessage":"","validationText":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","valueAsString":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00","lastSetTextBoxValue":"{0.day:02d}-{0.month:02d}-{0.year}"}}'.format(end),
  46.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateFromDatePicker_ClientState': '{"minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00"}',
  47.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_ClientState': '{"minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00"}',
  48.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_calendar_AD': '[[2016,7,19],[2099,12,30],[2016,8,1]]',
  49.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_calendar_SD': '[[2016,8,9]]',
  50.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateFromDatePicker": "{0.year}-{0.month:02d}-{0.day:02d}".format(start),
  51.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateFromDatePicker$dateInput": "{0.day:02d}-{0.month:02d}-{0.year}".format(start),
  52.    
  53.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateUpToDatePicker": "{0.year}-{0.month:02d}-{0.day:02d}".format(end),
  54.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateUpToDatePicker$dateInput": "{0.day:02d}-{0.month:02d}-{0.year}".format(end),
  55.    
  56.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateFromDatePicker_calendar_AD": "[[{0.year},{0.month},{0.day}],[2099,12,30],[{0.year},{0.month},{0.day}]]".format(start),
  57.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_calendar_AD": "[[{0.year},{0.month:02d},{0.day:02d}],[2099,12,30],[{0.year},{0.month:02d},{0.day:02d}]]".format(end),
  58.     '__EVENTARGUMENT': '',
  59.     '__EVENTTARGET': '',
  60. }
  61.  
  62. get_loc_ids=False # Update the location file
  63.  
  64. num_xpath=etree.XPath('table/tr/td[7]/text()') # number of available exams
  65. inputs_xpath=etree.XPath('//form[@name="aspnetForm"]//input')
  66. examrows_xpath=etree.XPath('//div[@class="gridSingleRow"]')
  67.  
  68. inputs_re=re.compile(r'<input (?:{pair})*name={val} (?:{pair})*value={val} (?:{pair})*/>'.format(pair='[a-z]+="(?:[^"]*)" ', val='"([^"]*)"'), re.A)
  69.  
  70. url='https://top.cbr.nl/Top/Reservation/BuyCapacityView.aspx'
  71.  
  72. s=Session()
  73. parser=etree.HTMLParser()
  74. etree.set_default_parser(parser)
  75.  
  76. s.headers.update({
  77.     'User-Agent': 'Mozilla/5.0 (X11, Linux x86_64, rv:47.0) Gecko/20100101 Firefox/47.0',
  78.     'Accept': 'text/html,application/xhtml+xml,application/xml,q=0.9,*/*,q=0.8',
  79.     'Accept-Encoding': 'gzip, deflate, br',
  80. })
  81.  
  82. def init(tree):
  83.     return {i.attrib['name']:i.attrib['value'] for i in inputs_xpath(tree) if 'value' in i.attrib}
  84.  
  85. def init_re(text):
  86.     res={}
  87.     for line in text.splitlines():
  88.         while True:
  89.             m=inputs_re.search(line)
  90.             if not m: break
  91.             g=m.groups()
  92.             res[g[0]]=g[1]
  93.             line=line[m.end():]
  94.     return res
  95.  
  96. def getvals(tree, ctl):
  97.     d=init(tree)
  98.     d.update({
  99.         '__CALLBACKID': ctl,
  100.         '__CALLBACKPARAM': '{"Command":"LOD","ClientState":{"value":"*", "text":"(alles)"}}',
  101.     })
  102.     r=s.post(url, data=d)
  103.  
  104.     j=loads(r.text[r.text.find('['):r.text.find(']')+1])
  105.    
  106.     return {i['text']:i['value'] for i in j}
  107.  
  108. # tosleep=datetime.combine(date.today()+timedelta(days=1), time(6,59,50))-datetime.now()
  109. # tosleep=tosleep.seconds
  110. # print(tosleep)
  111. # sleep(tosleep)
  112.  
  113. loc_ids={}
  114. with open('loc_ids.lst') as fi:
  115.     for i in fi:
  116.         loc,code=i.strip().split('\t')
  117.         loc_ids[loc]=code
  118. print(len(loc_ids),'locations', file=f)
  119.  
  120. #for _ in range(0):
  121. while True:
  122.     try:
  123.         r=s.get('https://top.cbr.nl/Top/LogOnView.aspx')
  124.     except Exception as e:
  125.         #print('Exception',e)
  126.         continue
  127.    
  128.     #print(r.status_code, len(r.text))
  129.     if len(r.text) > 2000: break
  130.  
  131. timestamp_s=datetime.now()
  132. print('Connected: {}'.format(timestamp_s), file=f)
  133.  
  134. # import timeit
  135. # print(timeit.timeit(lambda: etree.fromstring(r.text), number=1000))
  136. # print(timeit.timeit(lambda: init_re(r.text), number=1000))
  137. # exit()
  138.  
  139. # data=init(etree.fromstring(r.text))
  140. data=init_re(r.text)
  141.  
  142. data.update({
  143.     '__EVENTTARGET': 'ctl00$ctl00$ctl00$DefaultContent$DefaultContent$DefaultContent$LogOn',
  144.     'ctl00$ctl00$ctl00$DefaultContent$DefaultContent$DefaultContent$LogOnUserName': username,
  145.     'ctl00$ctl00$ctl00$DefaultContent$DefaultContent$DefaultContent$LogOnPassword': password
  146. })
  147. r=s.post('https://top.cbr.nl/Top/LogOnView.aspx', data=data)
  148.  
  149. r=s.get(url)
  150. tree=etree.fromstring(r.text)
  151.  
  152. if product:
  153.     pg_ids=getvals(tree, 'ctl00$ctl00$DefaultContent$DefaultContent$ProductGroups$referenceDataCombobox_ProductGroups')
  154.  
  155.     d=init(tree)
  156.     d.update({
  157.         '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$ProductGroups$referenceDataCombobox_ProductGroups',
  158.         'ctl00_ctl00_DefaultContent_DefaultContent_ProductGroups_referenceDataCombobox_ProductGroups_ClientState': '{"value":"'+pg_ids[product]+'"}'
  159.     })
  160.  
  161.     r=s.post(url, data=d)
  162.     tree=etree.fromstring(r.text)
  163.  
  164. if location:
  165.     if get_loc_ids:
  166.         loc_ids=getvals(tree, 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations')
  167.  
  168.         d=init(tree)
  169.         d.update({
  170.             '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations',
  171.             'ctl00_ctl00_DefaultContent_DefaultContent_Locations_referenceDataCombobox_Locations_ClientState': '{"logEntries":[],"value":"__expandcollapse","text":"toon alles","enabled":true,"checkedIndices":[],"checkedItemsTextOverflows":false}'
  172.         })
  173.  
  174.         r=s.post(url, data=d)
  175.         tree=etree.fromstring(r.text)
  176.  
  177.         loc_ids=getvals(tree, 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations')
  178.         with open('loc_ids.lst','w') as fo:
  179.             for i in loc_ids:
  180.                 fo.write(i+'\t'+loc_ids[i]+'\n')
  181.  
  182.         exit()
  183.        
  184.     d=init(tree)
  185.     d.update({
  186.         '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations',
  187.         'ctl00_ctl00_DefaultContent_DefaultContent_Locations_referenceDataCombobox_Locations_ClientState': '{"value":"'+loc_ids[location]+'"}'
  188.     })
  189.  
  190.     r=s.post(url, data=d)
  191.     tree=etree.fromstring(r.text)
  192.  
  193. # data=init(tree)
  194. data=init_re(r.text)
  195. data.update(buying_data)
  196.  
  197. r=s.post(url, data=data)
  198. timestamp_l=datetime.now()
  199. getexams_text=r.text
  200. tree=etree.fromstring(getexams_text)
  201.  
  202. data=init(tree)
  203. data.update({
  204.     '__EVENTARGUMENT': '',
  205.     '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$BuyCapacity',
  206.     'ctl00$ctl00$MasterScriptManager': 'ctl00$ctl00$MasterScriptManager|ctl00$ctl00$DefaultContent$DefaultContent$BuyCapacity',
  207. })
  208.  
  209. rows=examrows_xpath(tree)
  210. if not len(rows):
  211.     print('No exams found. Listing time: {}'.format(timestamp_l-timestamp_s), file=f)
  212.     open('getexams'+postfix+'.html','w').write(getexams_text)
  213.     exit()
  214.  
  215. for i in rows:
  216.     inp=i[0] # input field, first child of i
  217.     name=inp.attrib['name']
  218.     id=name.split('$')[5] # ctl??
  219.     data[name]=inp.attrib['value']
  220.  
  221.     cnt=num_xpath(i)[0].strip()
  222.     data['ctl00$ctl00$DefaultContent$DefaultContent$CapacityDataList${}$CapacityEditor$NumberToBuy'.format(id)]=cnt
  223.     data['ctl00_ctl00_DefaultContent_DefaultContent_CapacityDataList_{}_CapacityEditor_NumberToBuy_ClientState'.format(id)]='{{"valueAsString":"{}"}}'.format(cnt)
  224.  
  225. # for i in data:
  226. #     if i in ('__EVENTVALIDATION', '__VIEWSTATE'): continue
  227. #     print(i, '--', data[i])
  228.  
  229. r=s.post(url, data=data)
  230. timestamp_f=datetime.now()
  231.  
  232. print('Get exams page size:',len(getexams_text), file=f)
  233. print('Buy exams page size:',len(r.text), file=f)
  234. open('getexams'+postfix+'.html','w').write(getexams_text)
  235. open('buyexams'+postfix+'.html','w').write(r.text)
  236. print('Listing time: {}, Buying time: {}'.format(timestamp_l-timestamp_s, timestamp_f-timestamp_l), file=f)
  237.  
  238. f.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement