Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /************************************************
- * ASP.NET web site scraping script;
- * Developed by MishaInTheCloud.com
- * Copyright 2009 MishaInTheCloud.com. All rights reserved.
- * The use of this script is governed by the CodeProject Open License
- * See the following link for full details on use and restrictions.
- * http://www.codeproject.com/info/cpol10.aspx
- *
- * The above copyright notice must be included in any reproductions of this script.
- ************************************************/
- /************************************************
- * values used throughout the script
- ************************************************/
- // urls to call - the login page and the secured page
- $urlLogin = "urldellogin";
- //$urlSecuredPage = "urlavisitar";
- $urlSecuredPage = 'urlavisitar';
- // POST names and values to support login
- $nameUsername='ctl00$ContentPlaceHolder1$CtrlLogin1$tbNifEmail'; // the name of the username textbox on the login
- form
- $namePassword='ctl00$ContentPlaceHolder1$CtrlLogin1$tbPassword'; // the name of the password textbox on the login
- form
- $valLoginBtn='ctl00$ContentPlaceHolder1$CtrlLogin1$btnValidar'; // the name of the login button (submit) on th
- e login form
- $valUsername ='eluser'; // the value to submit for the username
- $valPassword ='laclave'; // the value to submit for the password
- $nameLoginBtn ='__EVENTTARGET'; // the text value of the login button itself
- $evArgName = '__EVENTARGUMENT';
- $evArgValue = '';
- $pubName = 'ctl00$ContentPlaceHolder1$hdnSelectedPubli';
- $pubValue = '';
- // the path to a file we can read/write; this will
- // store cookies we need for accessing secured pages
- $cookieFile = './ctemp/cookie.txt';
- // regular expressions to parse out the special ASP.NET
- // values for __VIEWSTATE and __EVENTVALIDATION
- $regexViewstate = '/__VIEWSTATE\" value=\"(.*)\"/i';
- $regexEventVal = '/__EVENTVALIDATION\" value=\"(.*)\"/i';
- /************************************************
- * utility function: regexExtract
- * use the given regular expression to extract
- * a value from the given text; $regs will
- * be set to an array of all group values
- * (assuming a match) and the nthValue item
- * from the array is returned as a string
- ************************************************/
- function regexExtract($text, $regex, $regs, $nthValue)
- {
- if (preg_match($regex, $text, $regs)) {
- $result = $regs[$nthValue];
- }
- else {
- $result = "";
- /************************************************
- * initialize a curl handle; we'll use this
- * handle throughout the script
- ************************************************/
- $ch = curl_init();
- /************************************************
- * first, issue a GET call to the ASP.NET login
- * page. This is necessary to retrieve the
- * __VIEWSTATE and __EVENTVALIDATION values
- * that the server issues
- ************************************************/
- curl_setopt($ch, CURLOPT_URL, $urlLogin);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
- curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
- $data=curl_exec($ch);
- // from the returned html, parse out the __VIEWSTATE and
- // __EVENTVALIDATION values
- $viewstate = regexExtract($data,$regexViewstate,$regs,1);
- $eventval = regexExtract($data, $regexEventVal,$regs,1);
- /************************************************
- * now issue a second call to the Login page;
- * this time, it will be a POST; we'll send back
- * as post data the __VIEWSTATE and __EVENTVALIDATION
- * values the server previously sent us, as well as the
- * username/password. We'll also set up a cookie
- * jar to retrieve the authentication cookie that
- * the server will generate and send us upon login.
- ************************************************/
- $postData = '__VIEWSTATE='.rawurlencode($viewstate)
- .'&__EVENTVALIDATION='.rawurlencode($eventval)
- .'&'.$nameUsername.'='.$valUsername
- .'&'.$namePassword.'='.$valPassword
- .'&'.$nameLoginBtn.'='.$valLoginBtn
- .'&'.$evArgName.'='.$evArgValue
- .'&'.$pubName.'='.$pubValue
- ;
- curl_setOpt($ch, CURLOPT_POST, TRUE);
- curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
- curl_setopt($ch, CURLOPT_URL, $urlLogin);
- curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
- #curl_setopt($ch, CURLOPT_HEADER, false);
- #curl_setopt($ch, CURLOPT_NOBODY, false);
- #curl_setopt($ch, CURLOPT_URL, $urlLogin);
- #curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
- #curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
- //set the cookie the site has for certain features, this is optional
- #curl_setopt($ch, CURLOPT_COOKIE, "cookiename=0");
- #curl_setopt($ch, CURLOPT_USERAGENT,
- # "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
- #curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- #curl_setopt($ch, CURLOPT_REFERER, $_SERVER['REQUEST_URI']);
- #curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
- #curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
- #curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
- #curl_setopt($ch, CURLOPT_POST, 1);
- #curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
- $data = curl_exec($ch);
- /************************************************
- * with the authentication cookie in the jar,
- * we'll now issue a GET to the secured page;
- * we set curl's COOKIEFILE option to the same
- * file we used for the jar before to ensure the
- * authentication cookie is sent back to the
- * server
- ************************************************/
- curl_setOpt($ch, CURLOPT_POST, FALSE);
- curl_setopt($ch, CURLOPT_URL, $urlSecuredPage);
- curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);
- $data = curl_exec($ch);
- // at this point the secured page may be parsed for
- // values, or additional POSTS made to submit parameters
- // and retrieve data. For this sample, we'll just
- // echo the results.
- echo $data;
- /************************************************
- * that's it! Close the curl handle
- ************************************************/
- curl_close($ch);
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement