Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import re, urllib2, requests
- from pyquery import PyQuery as pq
- from ctypes import c_int32
- def decode_string(in_str):
- return urllib2.unquote(in_str)
- def decode_action(d):
- f = d("form").eq(0)
- action = d(f).attr('action')
- d(f).attr('action', decode_string(action))
- def submit_form(d):
- e = d("input")
- for i in [1,2,5,7]:
- d(e[i]).attr("value", decode_string(d(e[i]).attr("value")))
- ">>>"
- form_elements = {}
- for el in e:
- form_elements[d(el).attr("name")] = d(el).attr("value")
- return (d("form").attr("action"), form_elements)
- "<<<"
- def challenge(html):
- d = pq(html)
- table = re.search(r'var table = "([^"]+)"', html).groups()[0]
- c = int(re.search(r'var c = (.*)\n', html).groups()[0])
- slt = re.search(r'var slt = "([^"]+)"', html).groups()[0]
- s1 = re.search(r"var s1 = '([^']+)'", html).groups()[0]
- s2 = re.search(r"var s2 = '([^']+)'", html).groups()[0]
- n = int(re.search(r'var n = (.*)\n', html).groups()[0])
- something = re.search(r'.value="([^"]+):" \+ chlg', html).groups()[0]
- " DEBUG OUTPUT FOLLOWS "
- print "########################################################################"
- #print("var table = \"%s\";" % table)
- print("var c = %d" % c)
- print("var slt = \"%s\"" % slt)
- print("var s1 = '%s'" % s1)
- print("var s2 = '%s'" % s2)
- print("var n = %d" % n)
- print("something=%s" % something)
- start = ord(s1[0])
- end = ord(s2[0])
- arr = [None]*n
- m = ((end - start) + 1)**n
- chlg = u""
- for i in range(n):
- arr[i] = s1
- for i in range(m - 1):
- j = n - 1
- while j >= 0:
- t = ord(arr[j][0])
- t += 1
- arr[j] = unichr(t)
- if ord(arr[j][0]) <= end:
- break
- else:
- arr[j] = s1
- j -= 1
- chlg = u"".join(arr)
- js_str = chlg + slt
- crc = 0
- crc = c_int32(crc).value ^ (-1)
- for k in js_str:
- table_offset = ((c_int32(crc).value ^ c_int32(ord(k)).value) & 0x000000FF) * 9
- table_data = int(table[table_offset:table_offset + 8],16)
- crc = (c_int32(crc).value >> 8) ^ c_int32(table_data).value
- crc = crc ^ (-1)
- crc = abs(crc)
- if crc == c:
- break
- d(d("input").eq(1)).attr("value", u"%s:%s:%s:%d" % (something, chlg, slt, crc))
- decode_action(d)
- return submit_form(d)
- url = "http://lema.rae.es/drae/srv/search?val=hacer"
- headers = {
- "Host" : "lema.rae.es",
- "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
- "Accept-Language": "de,en-US;q=0.7,en;q=0.3",
- "Accept-Encoding": "gzip, deflate",
- "DNT": "1"
- }
- s = requests.Session()
- s.headers.update(headers)
- r1 = s.get(url=url)
- html = r1.content.decode("utf-8")
- _, formdata = challenge(html)
- req = requests.Request('POST', url, data=formdata, headers={ "Referer": url })
- prepped = s.prepare_request(req)
- r2 = s.send(prepped)
- " DEBUG OUTPUT FOLLOWS "
- print "########################################################################"
- print r1.headers
- """
- {'content-length': '5484', 'content-type': 'text/html', 'pragma': 'no-cache', 'cache-control': 'no-cache'}
- """
- print r1.request.headers
- """
- {'Accept-Language': 'de,en-US;q=0.7,en;q=0.3',
- 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0',
- 'DNT': '1', 'Host': 'lema.rae.es'}
- """
- print "########################################################################"
- print formdata
- print "########################################################################"
- print r2.headers
- """
- This is not what I expected!
- {'content-length': '5799', 'content-type': 'text/html', 'pragma': 'no-cache', 'cache-control': 'no-cache'}
- """
- print r2.request.headers
- """
- {'Content-Length': '189',
- 'Accept-Language': 'de,en-US;q=0.7,en;q=0.3',
- 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0',
- 'DNT': '1', 'Host': 'lema.rae.es', 'Referer': 'http://lema.rae.es/drae/srv/search?val=hacer',
- 'Content-Type': 'application/x-www-form-urlencoded'}
- """
- print "########################################################################"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement