Advertisement
Guest User

1

a guest
Feb 12th, 2016
294
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 6.01 KB | None | 0 0
  1. <?php
  2. /************************************************
  3. * ASP.NET web site scraping script;
  4. * Developed by MishaInTheCloud.com
  5. * Copyright 2009 MishaInTheCloud.com. All rights reserved.
  6. * The use of this script is governed by the CodeProject Open License
  7. * See the following link for full details on use and restrictions.
  8. *   http://www.codeproject.com/info/cpol10.aspx
  9. *
  10. * The above copyright notice must be included in any reproductions of this script.
  11. ************************************************/
  12.  
  13. /************************************************
  14. * values used throughout the script
  15. ************************************************/
  16. // urls to call - the login page and the secured page
  17. $urlLogin = "urldellogin";
  18. //$urlSecuredPage = "urlavisitar";
  19. $urlSecuredPage = 'urlavisitar';
  20.  
  21. // POST names and values to support login
  22. $nameUsername='ctl00$ContentPlaceHolder1$CtrlLogin1$tbNifEmail';       // the name of the username textbox on the login
  23.  form
  24. $namePassword='ctl00$ContentPlaceHolder1$CtrlLogin1$tbPassword';       // the name of the password textbox on the login
  25.  form
  26. $valLoginBtn='ctl00$ContentPlaceHolder1$CtrlLogin1$btnValidar';          // the name of the login button (submit) on th
  27. e login form
  28. $valUsername ='eluser';        // the value to submit for the username
  29. $valPassword ='laclave';        // the value to submit for the password
  30. $nameLoginBtn ='__EVENTTARGET';             // the text value of the login button itself
  31. $evArgName = '__EVENTARGUMENT';
  32. $evArgValue = '';
  33. $pubName = 'ctl00$ContentPlaceHolder1$hdnSelectedPubli';
  34. $pubValue = '';
  35.  
  36. // the path to a file we can read/write; this will
  37. // store cookies we need for accessing secured pages
  38. $cookieFile = './ctemp/cookie.txt';
  39.  
  40. // regular expressions to parse out the special ASP.NET
  41. // values for __VIEWSTATE and __EVENTVALIDATION
  42. $regexViewstate = '/__VIEWSTATE\" value=\"(.*)\"/i';
  43. $regexEventVal  = '/__EVENTVALIDATION\" value=\"(.*)\"/i';
  44.  
  45.  
  46. /************************************************
  47. * utility function: regexExtract
  48. *    use the given regular expression to extract
  49. *    a value from the given text;  $regs will
  50. *    be set to an array of all group values
  51. *    (assuming a match) and the nthValue item
  52. *    from the array is returned as a string
  53. ************************************************/
  54. function regexExtract($text, $regex, $regs, $nthValue)
  55. {
  56. if (preg_match($regex, $text, $regs)) {
  57.  $result = $regs[$nthValue];
  58. }
  59. else {
  60.  $result = "";
  61. /************************************************
  62. * initialize a curl handle; we'll use this
  63. *   handle throughout the script
  64. ************************************************/
  65. $ch = curl_init();
  66.  
  67.  
  68. /************************************************
  69. * first, issue a GET call to the ASP.NET login
  70. *   page.  This is necessary to retrieve the
  71. *   __VIEWSTATE and __EVENTVALIDATION values
  72. *   that the server issues
  73. ************************************************/
  74. curl_setopt($ch, CURLOPT_URL, $urlLogin);
  75. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  76. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
  77. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
  78. $data=curl_exec($ch);
  79.  
  80. // from the returned html, parse out the __VIEWSTATE and
  81. // __EVENTVALIDATION values
  82. $viewstate = regexExtract($data,$regexViewstate,$regs,1);
  83. $eventval = regexExtract($data, $regexEventVal,$regs,1);
  84.  
  85.  
  86. /************************************************
  87. * now issue a second call to the Login page;
  88. *   this time, it will be a POST; we'll send back
  89. *   as post data the __VIEWSTATE and __EVENTVALIDATION
  90. *   values the server previously sent us, as well as the
  91. *   username/password.  We'll also set up a cookie
  92. *   jar to retrieve the authentication cookie that
  93. *   the server will generate and send us upon login.
  94. ************************************************/
  95. $postData = '__VIEWSTATE='.rawurlencode($viewstate)
  96.           .'&__EVENTVALIDATION='.rawurlencode($eventval)
  97.           .'&'.$nameUsername.'='.$valUsername
  98.           .'&'.$namePassword.'='.$valPassword
  99.           .'&'.$nameLoginBtn.'='.$valLoginBtn
  100.           .'&'.$evArgName.'='.$evArgValue
  101.           .'&'.$pubName.'='.$pubValue
  102.           ;
  103.  
  104. curl_setOpt($ch, CURLOPT_POST, TRUE);
  105. curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
  106. curl_setopt($ch, CURLOPT_URL, $urlLogin);
  107. curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
  108.  
  109. #curl_setopt($ch, CURLOPT_HEADER, false);
  110. #curl_setopt($ch, CURLOPT_NOBODY, false);
  111. #curl_setopt($ch, CURLOPT_URL, $urlLogin);
  112. #curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
  113.  
  114. #curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
  115. //set the cookie the site has for certain features, this is optional
  116. #curl_setopt($ch, CURLOPT_COOKIE, "cookiename=0");
  117. #curl_setopt($ch, CURLOPT_USERAGENT,
  118. #    "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
  119. #curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  120. #curl_setopt($ch, CURLOPT_REFERER, $_SERVER['REQUEST_URI']);
  121. #curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
  122. #curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
  123.  
  124. #curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");
  125. #curl_setopt($ch, CURLOPT_POST, 1);
  126. #curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
  127.  
  128. $data = curl_exec($ch);
  129.  
  130. /************************************************
  131. * with the authentication cookie in the jar,
  132. * we'll now issue a GET to the secured page;
  133. * we set curl's COOKIEFILE option to the same
  134. * file we used for the jar before to ensure the
  135. * authentication cookie is sent back to the
  136. * server
  137. ************************************************/
  138. curl_setOpt($ch, CURLOPT_POST, FALSE);
  139. curl_setopt($ch, CURLOPT_URL, $urlSecuredPage);
  140. curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);
  141.  
  142. $data = curl_exec($ch);
  143.  
  144. // at this point the secured page may be parsed for
  145. // values, or additional POSTS made to submit parameters
  146. // and retrieve data.  For this sample, we'll just
  147. // echo the results.
  148. echo $data;
  149.  
  150.  
  151.  
  152. /************************************************
  153. * that's it! Close the curl handle
  154. ************************************************/
  155. curl_close($ch);
  156.  
  157.  
  158. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement