Guest User

Untitled

a guest
Jan 4th, 2013
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.98 KB | None | 0 0
  1. <?php
  2. //include("../config.inc.php");
  3. //connecting to Database
  4. mysql_connect("localhost", "root") or die(mysql_error());
  5. //echo "Connected to MySQL<br />";
  6. mysql_select_db("project") or die(mysql_error());
  7. echo "Connected to Database";
  8.  
  9. // 1. initialize
  10. $ch = curl_init();
  11.  
  12. // 2. set the options, including the url
  13. $start=1;
  14. $end=100;
  15. $domain=$_GET['subdomain'];
  16. $field_table=$_GET['table'];
  17. //echo $field_table;
  18. $field=rawurlencode($domain);
  19.  
  20. curl_setopt($ch, CURLOPT_URL, "http://academic.research.microsoft.com/Search?query=$field&start=$start&end=$end");
  21. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  22. curl_setopt($ch, CURLOPT_HEADER, 0);
  23.  
  24. $userAgent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)';
  25. curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
  26. // 3. execute and fetch the resulting HTML output
  27. $html = curl_exec($ch);
  28. if (!$html) {
  29.     echo "<br />cURL error number:" .curl_errno($ch);
  30.     echo "<br />cURL error:" . curl_error($ch);
  31.     exit;
  32. }
  33. //echo $html;
  34. $dom = new DOMDocument();
  35. @$dom->loadHTML($html);
  36. $info = curl_getinfo($ch);
  37. echo 'Took ' . $info['total_time'] . ' seconds for url ' . $info['url'];
  38.  
  39. // grab all the on the page
  40. $xpath = new DOMXPath($dom);
  41. //echo $xpath;
  42. $hrefs = $xpath->evaluate("//div[@class='title']//h2/a");
  43. //echo '<br>' . $hrefs->length . '<br>';
  44.  
  45. for ($i = 0; $i < $hrefs->length; $i++) {
  46.     $href = $hrefs->item($i);
  47.     $number = $href->nodeValue  ;
  48.     preg_match("/[0-9]+/",$number,$output);
  49.     $num= $output[0];
  50. //  echo $num;
  51.    
  52. }
  53. if($num==0)
  54. {
  55.     $num=100000;
  56. }
  57.  
  58. while($start<=$num)
  59. {
  60. curl_setopt($ch, CURLOPT_URL, "http://academic.research.microsoft.com/Search?query=$field&SearchDomain=2&start=$start&end=$end");
  61. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  62. curl_setopt($ch, CURLOPT_HEADER, 0);
  63.  
  64. $userAgent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)';
  65. curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
  66. // 3. execute and fetch the resulting HTML output
  67.  script crashing, as it runs fine on my localhost" is a very wrong statement. – Mathieu Imbert 3 mins ago
  68. @datasage, what are the alternatives? – ankesh just now edit
  69. $html = curl_exec($ch);
  70. //echo $html;
  71. $dom = new DOMDocument();
  72. @$dom->loadHTML($html);
  73. $info = curl_getinfo($ch);
  74. //echo 'Took ' . $info['total_time'] . ' seconds for url ' . $info['url'];
  75.  
  76. // grab all the on the page
  77. $xpath = new DOMXPath($dom);
  78. $hrefs = $xpath->evaluate("//div[@class='title-download']//h3/a");
  79. //echo '<br>' . $hrefs->length . '<br>';
  80. for ($i = 0; $i < $hrefs->length; $i++) {
  81.     $href = $hrefs->item($i);
  82.     $name = $href->nodeValue  ;
  83.     $link= $href->getAttribute('href');
  84.     preg_match("/[0-9]+/",$link,$matches);
  85.     $id= $matches[0];
  86.  
  87.     mysql_query("INSERT INTO $field_table VALUES ('$name',$id)");
  88.     //storeLink($url,$target_url);
  89.     //echo "<br />Link stored: $url";
  90. }
  91. $start=$start+100;
  92. $end=$end+100;
  93. }
  94.  
  95. // 4. free up the curl handle
  96. curl_close($ch);
  97. ?>
Advertisement
Add Comment
Please, Sign In to add comment