from twisted.web import client from twisted.internet import reactor, defer from bs4 import BeautifulSoup as BeautifulSoup import time import xlwt start = time.time() wb = xlwt.Workbook(encoding='utf-8') ws = wb.add_sheet("BULATS_IA_PARSED") global x x = 0 Countries_List = ['Afghanistan','Armenia','Brazil','Argentina','Armenia','Australia','Austria','Azerbaijan','Bahrain','Bangladesh','Belgium','Belize','Bolivia','Bosnia and Herzegovina','Brazil','Brunei Darussalam','Bulgaria','Cameroon','Canada','Central African Republic','Chile','China','Colombia','Costa Rica','Croatia','Cuba','Cyprus','Czech Republic','Denmark','Dominican Republic','Ecuador','Egypt','Eritrea','Estonia','Ethiopia','Faroe Islands','Fiji','Finland','France','French Polynesia','Georgia','Germany','Gibraltar','Greece','Grenada','Hong Kong','Hungary','Iceland','India','Indonesia','Iran','Iraq','Ireland','Israel','Italy','Jamaica','Japan','Jordan','Kazakhstan','Kenya','Kuwait','Latvia','Lebanon','Libya','Liechtenstein','Lithuania','Luxembourg','Macau','Macedonia','Malaysia','Maldives','Malta','Mexico','Monaco','Montenegro','Morocco','Mozambique','Myanmar (Burma)','Nepal','Netherlands','New Caledonia','New Zealand','Nigeria','Norway','Oman','Pakistan','Palestine','Papua New Guinea','Paraguay','Peru','Philippines','Poland','Portugal','Qatar','Romania','Russia','Saudi Arabia','Serbia','Singapore','Slovakia','Slovenia','South Africa','South Korea','Spain','Sri Lanka','Sweden','Switzerland','Syria','Taiwan','Thailand','Trinadad and Tobago','Tunisia','Turkey','Ukraine','United Arab Emirates','United Kingdom','United States','Uruguay','Uzbekistan','Venezuela','Vietnam'] urls = ["http://www.cambridgeesol.org/institutions/results.php?region=%s&type=&BULATS=on" % Countries for Countries in Countries_List] def finish(results): global x for result in results: print 'GOT PAGE', len(result), 'bytes' soup = BeautifulSoup(result) tableau = soup.findAll('table') try: rows = tableau[3].findAll('tr') print("Fetching") for tr in rows: cols = tr.findAll('td') y = 0 x = x + 1 for td in cols: texte_bu = td.text texte_bu = texte_bu.encode('utf-8') #print("Writing...") #print texte_bu ws.write(x,y,td.text) y = y + 1 except(IndexError): print("No IA for this country") pass reactor.stop() CountryList = ['Test_Value','China ','UK - Scotland','Spain','Poland','Pakistan','Ireland','Hong Kong','Greece','Vietnam','United States of America', 'United Arab Emirates', 'UK - England', 'Turkey', 'Thailand', 'Taiwan', 'Singapore', 'Switzerland', 'Philippines', 'Netherlands', 'New Zealand', 'Malaysia', 'Italy', 'Finland', 'France', 'India', 'Canada', 'Australia', 'Germany', 'Republic of Korea','Afghanistan', 'Albania', 'Algeria', 'Argentina', 'Armenia', 'Austria', 'Azerbaijan', 'Bahrain','Bangladesh','Belgium','Belize','Bolivia','Bosnia and Herzegovina','Brazil','Brunei Darussalam','Bulgaria','Burma','Cambodia','Cayman Islands','Chile','Colombia','Cook Islands','Croatia','Cuba','Cyprus','Czech Republic','Denmark','Dominican Republic','East Timor','Ecuador','Egypt','El Salvador','Eritrea','Estonia','Ethiopia','Fiji','Georgia','Ghana','Global','Haiti','Hungary','Iceland','Indonesia','Iran','Iraq','Israel','Israel, the Gaza Strip and the West Bank', 'Istanbul','Jamaica','Japan','Jordan','Kazakhstan','Kenya','Kingdom of Saudi Arabia','Kiribat','Kuwait','Kyrgyz Republic','Laos','Latvia','Lebanon','Libya','Lithuania','Luxembourg','Macedonia','Malta','Mardan','Mauritius','Mexico','Moldova','Monaco','Mongolia','Morocco','Nauru','Nepal','New Caledonia','Nigeria','Niue','Norway','Oman','Palau','Papua New Guinea','Peru','Portugal','Puerto Rico','Qatar','Korea','Romania','Russia','Saarland','Samoa','Saudi Arabia','Serbia','Slovakia','Slovenia','Solomon Islands','South Africa','South Korea','Sri Lanka','St. Kitts','St. Vincent and the Grenadines','Sweden','Syria','Tahiti','Taiwan, Republic of China','Tajikistan','The Netherlands','Tonga','Trinidad and Tobago','Tristan da Cunha','Tunisia','Turkmenistan','Uganda','UK - Northern Ireland','UK - Wales','Ukraine','Uruguay','Uzbekistan','Vanuatu','Various countries','Venezuela','Viet Nam','West Indies','Yemen'] #d = getPage(url, method='POST', postdata="hello, world, or whatever.") waiting = [client.getPage("http://bandscore.ielts.org/search.aspx", method='POST', postdata={ '__VIEWSTATE' : '/wEPDwUKMTkwMjE5NDIwNg9kFgICAw9kFgQCAQ8QDxYCHgtfIURhdGFCb3VuZGdkDxaZAQIBAgICAwIEAgUCBgIHAggCCQIKAgsCDAINAg4CDwIQAhECEgITAhQCFQIWAhcCGAIZAhoCGwIcAh0CHgIfAiACIQIiAiMCJAIlAiYCJwIoAikCKgIrAiwCLQIuAi8CMAIxAjICMwI0AjUCNgI3AjgCOQI6AjsCPAI9Aj4CPwJAAkECQgJDAkQCRQJGAkcCSAJJAkoCSwJMAk0CTgJPAlACUQJSAlMCVAJVAlYCVwJYAlkCWgJbAlwCXQJeAl8CYAJhAmICYwJkAmUCZgJnAmgCaQJqAmsCbAJtAm4CbwJwAnECcgJzAnQCdQJ2AncCeAJ5AnoCewJ8An0CfgJ/AoABAoEBAoIBAoMBAoQBAoUBAoYBAocBAogBAokBAooBAosBAowBAo0BAo4BAo8BApABApEBApIBApMBApQBApUBApYBApcBApgBApkBFpkBEAULQWZnaGFuaXN0YW4FC0FmZ2hhbmlzdGFuZxAFB0FsYmFuaWEFB0FsYmFuaWFnEAUHQWxnZXJpYQUHQWxnZXJpYWcQBQlBcmdlbnRpbmEFCUFyZ2VudGluYWcQBQdBcm1lbmlhBQdBcm1lbmlhZxAFCUF1c3RyYWxpYQUJQXVzdHJhbGlhZxAFB0F1c3RyaWEFB0F1c3RyaWFnEAUKQXplcmJhaWphbgUKQXplcmJhaWphbmcQBQdCYWhyYWluBQdCYWhyYWluZxAFCkJhbmdsYWRlc2gFCkJhbmdsYWRlc2hnEAUHQmVsZ2l1bQUHQmVsZ2l1bWcQBQZCZWxpemUFBkJlbGl6ZWcQBQdCb2xpdmlhBQdCb2xpdmlhZxAFFkJvc25pYSBhbmQgSGVyemVnb3ZpbmEFFkJvc25pYSBhbmQgSGVyemVnb3ZpbmFnEAUGQnJhemlsBQZCcmF6aWxnEAURQnJ1bmVpIERhcnVzc2FsYW0FEUJydW5laSBEYXJ1c3NhbGFtZxAFCEJ1bGdhcmlhBQhCdWxnYXJpYWcQBQVCdXJtYQUFQnVybWFnEAUIQ2FtYm9kaWEFCENhbWJvZGlhZxAFBkNhbmFkYQUGQ2FuYWRhZxAFDkNheW1hbiBJc2xhbmRzBQ5DYXltYW4gSXNsYW5kc2cQBQVDaGlsZQUFQ2hpbGVnEAUGQ2hpbmEgBQZDaGluYSBnEAUIQ29sb21iaWEFCENvbG9tYmlhZxAFDENvb2sgSXNsYW5kcwUMQ29vayBJc2xhbmRzZxAFDkPDtHRlIGQnSXZvaXJlBQ5Dw7R0ZSBkJ0l2b2lyZWcQBQdDcm9hdGlhBQdDcm9hdGlhZxAFBEN1YmEFBEN1YmFnEAUGQ3lwcnVzBQZDeXBydXNnEAUOQ3plY2ggUmVwdWJsaWMFDkN6ZWNoIFJlcHVibGljZxAFB0Rlbm1hcmsFB0Rlbm1hcmtnEAUSRG9taW5pY2FuIFJlcHVibGljBRJEb21pbmljYW4gUmVwdWJsaWNnEAUKRWFzdCBUaW1vcgUKRWFzdCBUaW1vcmcQBQdFY3VhZG9yBQdFY3VhZG9yZxAFBUVneXB0BQVFZ3lwdGcQBQtFbCBTYWx2YWRvcgULRWwgU2FsdmFkb3JnEAUHRXJpdHJlYQUHRXJpdHJlYWcQBQdFc3RvbmlhBQdFc3RvbmlhZxAFCEV0aGlvcGlhBQhFdGhpb3BpYWcQBQRGaWppBQRGaWppZxAFB0ZpbmxhbmQFB0ZpbmxhbmRnEAUGRnJhbmNlBQZGcmFuY2VnEAUHR2VvcmdpYQUHR2VvcmdpYWcQBQdHZXJtYW55BQdHZXJtYW55ZxAFBUdoYW5hBQVHaGFuYWcQBQZHbG9iYWwFBkdsb2JhbGcQBQZHcmVlY2UFBkdyZWVjZWcQBQVIYWl0aQUFSGFpdGlnEAUJSG9uZyBLb25nBQlIb25nIEtvbmdnEAUHSHVuZ2FyeQUHSHVuZ2FyeWcQBQdJY2VsYW5kBQdJY2VsYW5kZxAFBUluZGlhBQVJbmRpYWcQBQlJbmRvbmVzaWEFCUluZG9uZXNpYWcQBQRJcmFuBQRJcmFuZxAFBElyYXEFBElyYXFnEAUHSXJlbGFuZAUHSXJlbGFuZGcQBQZJc3JhZWwFBklzcmFlbGcQBSlJc3JhZWwsIHRoZSBHYXphIFN0cmlwIGFuZCB0aGUgV2VzdCBCYW5rIAUpSXNyYWVsLCB0aGUgR2F6YSBTdHJpcCBhbmQgdGhlIFdlc3QgQmFuayBnEAUISXN0YW5idWwFCElzdGFuYnVsZxAFBUl0YWx5BQVJdGFseWcQBQdKYW1haWNhBQdKYW1haWNhZxAFBUphcGFuBQVKYXBhbmcQBQZKb3JkYW4FBkpvcmRhbmcQBQpLYXpha2hzdGFuBQpLYXpha2hzdGFuZxAFBUtlbnlhBQVLZW55YWcQBRdLaW5nZG9tIG9mIFNhdWRpIEFyYWJpYQUXS2luZ2RvbSBvZiBTYXVkaSBBcmFiaWFnEAUIS2lyaWJhdGkFCEtpcmliYXRpZxAFBUtvcmVhBQVLb3JlYWcQBQZLdXdhaXQFBkt1d2FpdGcQBQ9LeXJneXogUmVwdWJsaWMFD0t5cmd5eiBSZXB1YmxpY2cQBQRMYW9zBQRMYW9zZxAFBkxhdHZpYQUGTGF0dmlhZxAFB0xlYmFub24FB0xlYmFub25nEAUFTGlieWEFBUxpYnlhZxAFCUxpdGh1YW5pYQUJTGl0aHVhbmlhZxAFCkx1eGVtYm91cmcFCkx1eGVtYm91cmdnEAUJTWFjZWRvbmlhBQlNYWNlZG9uaWFnEAUITWFsYXlzaWEFCE1hbGF5c2lhZxAFBU1hbHRhBQVNYWx0YWcQBQZNYXJkYW4FBk1hcmRhbmcQBQlNYXVyaXRpdXMFCU1hdXJpdGl1c2cQBQZNZXhpY28FBk1leGljb2cQBQdNb2xkb3ZhBQdNb2xkb3ZhZxAFBk1vbmFjbwUGTW9uYWNvZxAFCE1vbmdvbGlhBQhNb25nb2xpYWcQBQdNb3JvY2NvBQdNb3JvY2NvZxAFBU5hdXJ1BQVOYXVydWcQBQVOZXBhbAUFTmVwYWxnEAULTmV0aGVybGFuZHMFC05ldGhlcmxhbmRzZxAFDU5ldyBDYWxlZG9uaWEFDU5ldyBDYWxlZG9uaWFnEAULTmV3IFplYWxhbmQFC05ldyBaZWFsYW5kZxAFB05pZ2VyaWEFB05pZ2VyaWFnEAUETml1ZQUETml1ZWcQBQZOb3J3YXkFBk5vcndheWcQBQRPbWFuBQRPbWFuZxAFCFBha2lzdGFuBQhQYWtpc3RhbmcQBQVQYWxhdQUFUGFsYXVnEAUQUGFwdWEgTmV3IEd1aW5lYQUQUGFwdWEgTmV3IEd1aW5lYWcQBQRQZXJ1BQRQZXJ1ZxAFC1BoaWxpcHBpbmVzBQtQaGlsaXBwaW5lc2cQBQZQb2xhbmQFBlBvbGFuZGcQBQhQb3J0dWdhbAUIUG9ydHVnYWxnEAULUHVlcnRvIFJpY28FC1B1ZXJ0byBSaWNvZxAFBVFhdGFyBQVRYXRhcmcQBRFSZXB1YmxpYyBvZiBLb3JlYQURUmVwdWJsaWMgb2YgS29yZWFnEAUHUm9tYW5pYQUHUm9tYW5pYWcQBQZSdXNzaWEFBlJ1c3NpYWcQBQhTYWFybGFuZAUIU2FhcmxhbmRnEAUFU2Ftb2EFBVNhbW9hZxAFDFNhdWRpIEFyYWJpYQUMU2F1ZGkgQXJhYmlhZxAFBlNlcmJpYQUGU2VyYmlhZxAFCVNpbmdhcG9yZQUJU2luZ2Fwb3JlZxAFCFNsb3Zha2lhBQhTbG92YWtpYWcQBQhTbG92ZW5pYQUIU2xvdmVuaWFnEAUPU29sb21vbiBJc2xhbmRzBQ9Tb2xvbW9uIElzbGFuZHNnEAUMU291dGggQWZyaWNhBQxTb3V0aCBBZnJpY2FnEAULU291dGggS29yZWEFC1NvdXRoIEtvcmVhZxAFBVNwYWluBQVTcGFpbmcQBQlTcmkgTGFua2EFCVNyaSBMYW5rYWcQBQlTdC4gS2l0dHMFCVN0LiBLaXR0c2cQBR5TdC4gVmluY2VudCBhbmQgdGhlIEdyZW5hZGluZXMFHlN0LiBWaW5jZW50IGFuZCB0aGUgR3JlbmFkaW5lc2cQBQZTd2VkZW4FBlN3ZWRlbmcQBQtTd2l0emVybGFuZAULU3dpdHplcmxhbmRnEAUFU3lyaWEFBVN5cmlhZxAFBlRhaGl0aQUGVGFoaXRpZxAFBlRhaXdhbgUGVGFpd2FuZxAFGVRhaXdhbiwgUmVwdWJsaWMgb2YgQ2hpbmEFGVRhaXdhbiwgUmVwdWJsaWMgb2YgQ2hpbmFnEAUKVGFqaWtpc3RhbgUKVGFqaWtpc3RhbmcQBQhUaGFpbGFuZAUIVGhhaWxhbmRnEAUPVGhlIE5ldGhlcmxhbmRzBQ9UaGUgTmV0aGVybGFuZHNnEAUFVG9uZ2EFBVRvbmdhZxAFE1RyaW5pZGFkIGFuZCBUb2JhZ28FE1RyaW5pZGFkIGFuZCBUb2JhZ29nEAUQVHJpc3RhbiBkYSBDdW5oYQUQVHJpc3RhbiBkYSBDdW5oYWcQBQdUdW5pc2lhBQdUdW5pc2lhZxAFBlR1cmtleQUGVHVya2V5ZxAFDFR1cmttZW5pc3RhbgUMVHVya21lbmlzdGFuZxAFBlVnYW5kYQUGVWdhbmRhZxAFDFVLIC0gRW5nbGFuZAUMVUsgLSBFbmdsYW5kZxAFFVVLIC0gTm9ydGhlcm4gSXJlbGFuZAUVVUsgLSBOb3J0aGVybiBJcmVsYW5kZxAFDVVLIC0gU2NvdGxhbmQFDVVLIC0gU2NvdGxhbmRnEAUKVUsgLSBXYWxlcwUKVUsgLSBXYWxlc2cQBQdVa3JhaW5lBQdVa3JhaW5lZxAFFFVuaXRlZCBBcmFiIEVtaXJhdGVzBRRVbml0ZWQgQXJhYiBFbWlyYXRlc2cQBRhVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EFGFVuaXRlZCBTdGF0ZXMgb2YgQW1lcmljYWcQBQdVcnVndWF5BQdVcnVndWF5ZxAFClV6YmVraXN0YW4FClV6YmVraXN0YW5nEAUHVmFudWF0dQUHVmFudWF0dWcQBRFWYXJpb3VzIGNvdW50cmllcwURVmFyaW91cyBjb3VudHJpZXNnEAUJVmVuZXp1ZWxhBQlWZW5lenVlbGFnEAUIVmlldCBOYW0FCFZpZXQgTmFtZxAFB1ZpZXRuYW0FB1ZpZXRuYW1nEAULV2VzdCBJbmRpZXMFC1dlc3QgSW5kaWVzZxAFBVllbWVuBQVZZW1lbmdkZAIPDzwrAA0AZBgCBR5fX0NvbnRyb2xzUmVxdWlyZVBvc3RCYWNrS2V5X18WAQUKY21kU2VhcmNoeAUQZ2R2U2VhcmNoUmVzdWx0cw9nZLWgL+bo7mQGsIE+VBHwdq0Volr7''', '__PREVIOUSPAGE' : 'EyxfwBf2A7IOt7bJTFbykEKuB-ERzMDNOrfC9rKUImBkq5iE3PhnD2YwJnA7OB5jxPkbo600qGoBYLeqQeK1fbmQkfs1', '__EVENTVALIDATION' : '/wEWpgEC3fjr2Q4CneSP5QoC1Y/P1gICyb/KxQIC3O/xsAICgZrbzgYCqvqP0wkCxoeV5w8C+P231w8C/+u85QECrOjQ+w4Co7iw2A0Ck9rpugMChayScQLQyIOGBgK25qDaCgKtsuGNBQLkiqbGDQKfh4rhCQK+yNiFCwK/t76MDgKW/7aMAgLdt+HwCwKY6b+oCwKc6Yf9CgKCh6KdBALhmpqwBQKvl471CgK07vobAoCjhpAPAp++5MgHAsig5rQPAsHS0JkHAvS84PQHAuSL8PoJAoPF37cCAp+eidUNAuS47eIFAt65jOMHAsPJjOIOAu7HtrkBAt+ZpUECwLCt1gwCr7GNrwsCse3CyAkCzLnGkA0CyLH5pQsCgueEQAKinuLFBwLv66P4DQL4vJvSCQK/nvfWCwLMqLaRAQKp8ZOnAQLV8suWAgKRuryUDwKRurj3DQLMqOLsBgLXy/CXBAK7lbjKBQKLnL2iDwLmvaGeCAKyxLvCCwLduZRYAvbL5LMOAsq2hicCzfiCiwMChsiijAUCpuzZqg8CkcqAmAEChtjjugsCkbOh3gkCzu+X1AsC+uTkmggChY6J4AECxKW7qggCvoGtigsC16er7g4CyZmAhg8C3IHG6AECxa6KuA8C18r8vQ4C1974gwgCo/a+jgQCksvzxQgCjfmGggICgILo9wUCysXYxQ0C+YzTmwUC+76EXALz0v6GDwKw66r5CAK7l5X8BQL97o2+AgLei6qRBAK6ycySDwKbuoiUDwKQ+b6xBALSqY7ZDgLix43HAwKd0NODAQLEjdf7CAKnqfbZDgK3ya/0AQKGvf7eCAKA4Jz3CQL10aCmBAK3xpvACwKb/7bjCQK9qoGQAgKg0cjiCgLRwO3/CwKR1KDuDAKQhYeGDQLDs+yDCQL/s+CDCQKA19NDArzO6bgHAtD4zeUPAr++9fUHAvq/m/8FAu6z14cLAr3A+4EGAoSbjpQPAoGT8vwMAonU2LwOAvXDiaYLAvP0x+sHAoGQ6bgPAsOW4ucIAqKqoYoEAtj8hOkDAuj6wqAOAvG9o8oFAt3mpM4LAq361tIMAqjUlP0JAp3+34oLAve+udULApLz6TIC3Lje8w0Cr/KrzQICgtr4sgsC2uPGqwgCpKPKmgQC/sTelw0C77PHww0ChbGH+QsC5bKuswUCx6jx7AQCsIf1zwkCme3AqQ8Cr8rp8wUCgvTE2QsCkd2whQEC0d7vkwwCruWjpgkClMnJ1wMCirCk7g0CsLWlgg8CveqZxQkC4bX2zAkC76fjuQ8C28aX9QcCytnP7wwCwNm36QM3JE3YXgBSLhN/K/0A9f9zFw4oqw==', 'DropDownList1' : Country, 'txtSearchInstitution' : '', 'hdnSearchText' : '', 'rdoFilter': '%25', 'cmdSearchx.x' : '0', 'cmdSearchx.y' : '0', }) for Country in CountryList] #waiting = [client.getPage(url) for url in urls] defer.gatherResults(waiting).addCallback(finish) reactor.run() wb.save("IALOL.xls") print "Elapsed Time: %s" % (time.time() - start)