Advertisement
Guest User

Extract URLs from Webpage

a guest
Jan 15th, 2015
1,371
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 0.89 KB | None | 0 0
  1. <?php
  2.  
  3. // by seidbenseid
  4. // from arabia.io
  5. // algerian HaXoor
  6.  
  7. function get_all_urls ($url) {
  8.     if (function_exists ("curl_exec")) {
  9.         $ch = curl_init ($url);
  10.         curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
  11.         curl_setopt ($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; rv:35.0) Gecko/20100101 Firefox/35.0");
  12.         curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, false);
  13.         curl_setopt ($ch, CURLOPT_SSL_VERIFYHOST, false);
  14.         curl_setopt ($ch, CURLOPT_HEADER, false);
  15.         curl_setopt ($ch, CURLOPT_ENCODING, false);
  16.         $data = curl_exec ($ch);
  17.     }
  18.     else
  19.         $data = file_get_contents ($url);
  20.  
  21.     $urls = array ();
  22.  
  23.     if (preg_match_all ('#<a\s*(.*?)>#i', $data, $attrs)) {
  24.         foreach ($attrs[1] as $attr) {
  25.             if (preg_match ('#href\s*=\s*(\'|")(.*?)(\'|")#i', $attr, $href)) {
  26.                 if ($href[2] && !in_array ($href[2], $urls))
  27.                     $urls[] = $href[2];
  28.             }
  29.         }
  30.     }
  31.  
  32.     return $urls;
  33. }
  34.  
  35. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement