Advertisement
Guest User

Untitled

a guest
Dec 25th, 2017
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.23 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. from datetime import time, date, datetime, timedelta
  4. from time import sleep
  5. from json import loads
  6.  
  7. from lxml import etree
  8. from requests import Session, exceptions
  9. import re
  10.  
  11. #usuario e senha, de onde vem?
  12. username='apaguei :)'
  13. password='apaguei :)'
  14.  
  15. #location='Barendrecht (Zwaalweg 1)'
  16. location='' # 'Breda (Heerbaan 4)' # Can be empty
  17. product='' # 'BTH' # Can be empty
  18. start_time="14:00"
  19. end_time='' # Can be empty
  20.  
  21. #periodo de tempo
  22. start=date.today()+timedelta(days=18)
  23. end=start+timedelta(weeks=3)
  24.  
  25. # start=date.today()+timedelta(weeks=4)
  26. # end=start+timedelta(days=5)
  27.  
  28. postfix='{} {}-{}'.format(username, start_time, end_time).replace(':','.')
  29.  
  30. f=open(postfix+'.log','w')
  31.  
  32. # Prepare the data before to save time within the request
  33. buying_data={
  34.     "ctl00$ctl00$DefaultContent$DefaultContent$Divisions$referenceDataCombobox_Divisions": "DIVISIE AFNAME THEORIE",
  35.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamTimeFrom": start_time,
  36.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamTimeUpToInclusive": end_time,
  37.     "ctl00$ctl00$DefaultContent$DefaultContent$Find.x": "33",
  38.     "ctl00$ctl00$DefaultContent$DefaultContent$Find.y": "38",
  39.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeFridays": "on",
  40.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeMondays": "on",
  41.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeSaturdays": "on",
  42.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeThursdays": "on",
  43.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeTuesdays": "on",
  44.     "ctl00$ctl00$DefaultContent$DefaultContent$IncludeWednesdays": "on",
  45.     "ctl00$ctl00$MasterScriptManager": "ctl00$ctl00$MasterScriptManager|ctl00$ctl00$DefaultContent$DefaultContent$Find",
  46.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateFromDatePicker_dateInput_ClientState": '{{"enabled":true,"emptyMessage":"","validationText":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","valueAsString":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00","lastSetTextBoxValue":"{0.day:02d}-{0.month:02d}-{0.year}"}}'.format(start),
  47.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_dateInput_ClientState": '{{"enabled":true,"emptyMessage":"","validationText":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","valueAsString":"{0.year}-{0.month:02d}-{0.day:02d}-00-00-00","minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00","lastSetTextBoxValue":"{0.day:02d}-{0.month:02d}-{0.year}"}}'.format(end),
  48.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateFromDatePicker_ClientState': '{"minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00"}',
  49.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_ClientState': '{"minDateStr":"1000-01-01-00-00-00","maxDateStr":"2099-12-31-00-00-00"}',
  50.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_calendar_AD': '[[2016,7,19],[2099,12,30],[2016,8,1]]',
  51.     'ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_calendar_SD': '[[2016,8,9]]',
  52.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateFromDatePicker": "{0.year}-{0.month:02d}-{0.day:02d}".format(start),
  53.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateFromDatePicker$dateInput": "{0.day:02d}-{0.month:02d}-{0.year}".format(start),
  54.    
  55.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateUpToDatePicker": "{0.year}-{0.month:02d}-{0.day:02d}".format(end),
  56.     "ctl00$ctl00$DefaultContent$DefaultContent$ExamDateUpToDatePicker$dateInput": "{0.day:02d}-{0.month:02d}-{0.year}".format(end),
  57.    
  58.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateFromDatePicker_calendar_AD": "[[{0.year},{0.month},{0.day}],[2099,12,30],[{0.year},{0.month},{0.day}]]".format(start),
  59.     "ctl00_ctl00_DefaultContent_DefaultContent_ExamDateUpToDatePicker_calendar_AD": "[[{0.year},{0.month:02d},{0.day:02d}],[2099,12,30],[{0.year},{0.month:02d},{0.day:02d}]]".format(end),
  60.     '__EVENTARGUMENT': '',
  61.     '__EVENTTARGET': '',
  62. }
  63.  
  64. get_loc_ids=False # Update the location file
  65.  
  66. num_xpath=etree.XPath('table/tr/td[7]/text()') # number of available exams
  67. inputs_xpath=etree.XPath('//form[@name="aspnetForm"]//input')
  68. examrows_xpath=etree.XPath('//div[@class="gridSingleRow"]')
  69.  
  70. inputs_re=re.compile(r'<input (?:{pair})*name={val} (?:{pair})*value={val} (?:{pair})*/>'.format(pair='[a-z]+="(?:[^"]*)" ', val='"([^"]*)"'), re.A)
  71.  
  72. url='https://top.cbr.nl/Top/Reservation/BuyCapacityView.aspx'
  73.  
  74. #obter a pagina de login para fazer o login com sucesso
  75. s=Session()
  76. parser=etree.HTMLParser()
  77. etree.set_default_parser(parser)
  78.  
  79. s.headers.update({
  80.     'User-Agent': 'Mozilla/5.0 (X11, Linux x86_64, rv:47.0) Gecko/20100101 Firefox/47.0',
  81.     'Accept': 'text/html,application/xhtml+xml,application/xml,q=0.9,*/*,q=0.8',
  82.     'Accept-Encoding': 'gzip, deflate, br',
  83. })
  84.  
  85. def init(tree):
  86.     return {i.attrib['name']:i.attrib['value'] for i in inputs_xpath(tree) if 'value' in i.attrib}
  87.  
  88. def init_re(text):
  89.     res={}
  90.     for line in text.splitlines():
  91.         while True:
  92.             m=inputs_re.search(line)
  93.             if not m: break
  94.             g=m.groups()
  95.             res[g[0]]=g[1]
  96.             line=line[m.end():]
  97.     return res
  98.  
  99. def getvals(tree, ctl):
  100.     d=init(tree)
  101.     d.update({
  102.         '__CALLBACKID': ctl,
  103.         '__CALLBACKPARAM': '{"Command":"LOD","ClientState":{"value":"*", "text":"(alles)"}}',
  104.     })
  105.     r=s.post(url, data=d)
  106.  
  107.     j=loads(r.text[r.text.find('['):r.text.find(']')+1])
  108.    
  109.     return {i['text']:i['value'] for i in j}
  110.  
  111. # tosleep=datetime.combine(date.today()+timedelta(days=1), time(6,59,50))-datetime.now()
  112. # tosleep=tosleep.seconds
  113. # print(tosleep)
  114. # sleep(tosleep)
  115.  
  116. #carrega arquivo de locais
  117. loc_ids={}
  118. with open('loc_ids.lst') as fi:
  119.     for i in fi:
  120.         loc,code=i.strip().split('\t')
  121.         loc_ids[loc]=code
  122. print(len(loc_ids),'locations', file=f)
  123.  
  124. #obtem pagina de login
  125. #for _ in range(0):
  126. while True:
  127.     try:
  128.         r=s.get('https://top.cbr.nl/Top/LogOnView.aspx')
  129.     except Exception as e:
  130.         #print('Exception',e)
  131.         continue
  132.    
  133.     #print(r.status_code, len(r.text))
  134.     if len(r.text) > 2000: break
  135.  
  136. timestamp_s=datetime.now()
  137. print('Connected: {}'.format(timestamp_s), file=f)
  138.  
  139. # import timeit
  140. # print(timeit.timeit(lambda: etree.fromstring(r.text), number=1000))
  141. # print(timeit.timeit(lambda: init_re(r.text), number=1000))
  142. # exit()
  143.  
  144. #obtem os hidden input
  145. # data=init(etree.fromstring(r.text))
  146. data=init_re(r.text)
  147.  
  148. data.update({
  149.     '__EVENTTARGET': 'ctl00$ctl00$ctl00$DefaultContent$DefaultContent$DefaultContent$LogOn',
  150.     'ctl00$ctl00$ctl00$DefaultContent$DefaultContent$DefaultContent$LogOnUserName': username,
  151.     'ctl00$ctl00$ctl00$DefaultContent$DefaultContent$DefaultContent$LogOnPassword': password
  152. })
  153.  
  154. #faz o post
  155. r=s.post('https://top.cbr.nl/Top/LogOnView.aspx', data=data)
  156.  
  157. r=s.get(url)
  158. tree=etree.fromstring(r.text)
  159.  
  160. if product:
  161.     pg_ids=getvals(tree, 'ctl00$ctl00$DefaultContent$DefaultContent$ProductGroups$referenceDataCombobox_ProductGroups')
  162.  
  163.     d=init(tree)
  164.     d.update({
  165.         '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$ProductGroups$referenceDataCombobox_ProductGroups',
  166.         'ctl00_ctl00_DefaultContent_DefaultContent_ProductGroups_referenceDataCombobox_ProductGroups_ClientState': '{"value":"'+pg_ids[product]+'"}'
  167.     })
  168.  
  169.     r=s.post(url, data=d)
  170.     tree=etree.fromstring(r.text)
  171.  
  172. if location:
  173.     if get_loc_ids:
  174.         loc_ids=getvals(tree, 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations')
  175.  
  176.         d=init(tree)
  177.         d.update({
  178.             '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations',
  179.             'ctl00_ctl00_DefaultContent_DefaultContent_Locations_referenceDataCombobox_Locations_ClientState': '{"logEntries":[],"value":"__expandcollapse","text":"toon alles","enabled":true,"checkedIndices":[],"checkedItemsTextOverflows":false}'
  180.         })
  181.  
  182.         r=s.post(url, data=d)
  183.         tree=etree.fromstring(r.text)
  184.  
  185.         loc_ids=getvals(tree, 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations')
  186.         with open('loc_ids.lst','w') as fo:
  187.             for i in loc_ids:
  188.                 fo.write(i+'\t'+loc_ids[i]+'\n')
  189.  
  190.         exit()
  191.        
  192.     d=init(tree)
  193.     d.update({
  194.         '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$Locations$referenceDataCombobox_Locations',
  195.         'ctl00_ctl00_DefaultContent_DefaultContent_Locations_referenceDataCombobox_Locations_ClientState': '{"value":"'+loc_ids[location]+'"}'
  196.     })
  197.  
  198.     r=s.post(url, data=d)
  199.     tree=etree.fromstring(r.text)
  200.  
  201. # data=init(tree)
  202. data=init_re(r.text)
  203. data.update(buying_data)
  204.  
  205. r=s.post(url, data=data)
  206. timestamp_l=datetime.now()
  207. getexams_text=r.text
  208. tree=etree.fromstring(getexams_text)
  209.  
  210. data=init(tree)
  211. data.update({
  212.     '__EVENTARGUMENT': '',
  213.     '__EVENTTARGET': 'ctl00$ctl00$DefaultContent$DefaultContent$BuyCapacity',
  214.     'ctl00$ctl00$MasterScriptManager': 'ctl00$ctl00$MasterScriptManager|ctl00$ctl00$DefaultContent$DefaultContent$BuyCapacity',
  215. })
  216.  
  217. rows=examrows_xpath(tree)
  218. if not len(rows):
  219.     print('No exams found. Listing time: {}'.format(timestamp_l-timestamp_s), file=f)
  220.     open('getexams'+postfix+'.html','w').write(getexams_text)
  221.     exit()
  222.  
  223. for i in rows:
  224.     inp=i[0] # input field, first child of i
  225.     name=inp.attrib['name']
  226.     id=name.split('$')[5] # ctl??
  227.     data[name]=inp.attrib['value']
  228.  
  229.     cnt=num_xpath(i)[0].strip()
  230.     data['ctl00$ctl00$DefaultContent$DefaultContent$CapacityDataList${}$CapacityEditor$NumberToBuy'.format(id)]=cnt
  231.     data['ctl00_ctl00_DefaultContent_DefaultContent_CapacityDataList_{}_CapacityEditor_NumberToBuy_ClientState'.format(id)]='{{"valueAsString":"{}"}}'.format(cnt)
  232.  
  233. # for i in data:
  234. #     if i in ('__EVENTVALIDATION', '__VIEWSTATE'): continue
  235. #     print(i, '--', data[i])
  236.  
  237. r=s.post(url, data=data)
  238. timestamp_f=datetime.now()
  239.  
  240. print('Get exams page size:',len(getexams_text), file=f)
  241. print('Buy exams page size:',len(r.text), file=f)
  242. open('getexams'+postfix+'.html','w').write(getexams_text)
  243. open('buyexams'+postfix+'.html','w').write(r.text)
  244. print('Listing time: {}, Buying time: {}'.format(timestamp_l-timestamp_s, timestamp_f-timestamp_l), file=f)
  245.  
  246. f.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement